2 /*--------------------------------------------------------------------*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Callgrind, a Valgrind tool for call graph
11 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
33 #include "callgrind.h"
36 #include "pub_tool_threadstate.h"
37 #include "pub_tool_gdbserver.h"
38 #include "pub_tool_transtab.h" // VG_(discard_translations_safely)
40 #include "cg_branchpred.c"
42 /*------------------------------------------------------------*/
43 /*--- Global variables ---*/
44 /*------------------------------------------------------------*/
47 CommandLineOptions
CLG_(clo
);
48 Statistics
CLG_(stat
);
49 Bool
CLG_(instrument_state
) = True
; /* Instrumentation on ? */
51 /* thread and signal handler specific */
52 exec_state
CLG_(current_state
);
54 /* min of L1 and LL cache line sizes. This only gets set to a
55 non-zero value if we are doing cache simulation. */
56 Int
CLG_(min_line_size
) = 0;
59 /*------------------------------------------------------------*/
60 /*--- Statistics ---*/
61 /*------------------------------------------------------------*/
63 static void CLG_(init_statistics
)(Statistics
* s
)
68 s
->rec_call_counter
= 0;
72 s
->context_counter
= 0;
73 s
->bb_retranslations
= 0;
76 s
->distinct_files
= 0;
78 s
->distinct_contexts
= 0;
80 s
->distinct_bbccs
= 0;
81 s
->distinct_instrs
= 0;
82 s
->distinct_skips
= 0;
84 s
->bb_hash_resizes
= 0;
85 s
->bbcc_hash_resizes
= 0;
86 s
->jcc_hash_resizes
= 0;
87 s
->cxt_hash_resizes
= 0;
88 s
->fn_array_resizes
= 0;
89 s
->call_stack_resizes
= 0;
90 s
->fn_stack_resizes
= 0;
92 s
->full_debug_BBs
= 0;
93 s
->file_line_debug_BBs
= 0;
94 s
->fn_name_debug_BBs
= 0;
96 s
->bbcc_lru_misses
= 0;
97 s
->jcc_lru_misses
= 0;
98 s
->cxt_lru_misses
= 0;
103 /*------------------------------------------------------------*/
104 /*--- Simple callbacks (not cache similator) ---*/
105 /*------------------------------------------------------------*/
108 static void log_global_event(InstrInfo
* ii
)
112 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
113 CLG_(bb_base
) + ii
->instr_offset
, ii
->instr_size
);
115 if (!CLG_(current_state
).collect
) return;
117 CLG_ASSERT( (ii
->eventset
->mask
& (1u<<EG_BUS
))>0 );
119 CLG_(current_state
).cost
[ fullOffset(EG_BUS
) ]++;
121 if (CLG_(current_state
).nonskipped
)
122 cost_Bus
= CLG_(current_state
).nonskipped
->skipped
+ fullOffset(EG_BUS
);
124 cost_Bus
= CLG_(cost_base
) + ii
->cost_offset
+ ii
->eventset
->offset
[EG_BUS
];
129 /* For branches, we consult two different predictors, one which
130 predicts taken/untaken for conditional branches, and the other
131 which predicts the branch target address for indirect branches
132 (jump-to-register style ones). */
135 void log_cond_branch(InstrInfo
* ii
, Word taken
)
141 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n",
142 CLG_(bb_base
) + ii
->instr_offset
, taken
);
144 miss
= 1 & do_cond_branch_predict(CLG_(bb_base
) + ii
->instr_offset
, taken
);
146 if (!CLG_(current_state
).collect
) return;
148 CLG_ASSERT( (ii
->eventset
->mask
& (1u<<EG_BC
))>0 );
150 if (CLG_(current_state
).nonskipped
)
151 cost_Bc
= CLG_(current_state
).nonskipped
->skipped
+ fullOffset(EG_BC
);
153 cost_Bc
= CLG_(cost_base
) + ii
->cost_offset
+ ii
->eventset
->offset
[EG_BC
];
155 fullOffset_Bc
= fullOffset(EG_BC
);
156 CLG_(current_state
).cost
[ fullOffset_Bc
]++;
159 CLG_(current_state
).cost
[ fullOffset_Bc
+1 ]++;
165 void log_ind_branch(InstrInfo
* ii
, UWord actual_dst
)
171 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
172 CLG_(bb_base
) + ii
->instr_offset
, actual_dst
);
174 miss
= 1 & do_ind_branch_predict(CLG_(bb_base
) + ii
->instr_offset
, actual_dst
);
176 if (!CLG_(current_state
).collect
) return;
178 CLG_ASSERT( (ii
->eventset
->mask
& (1u<<EG_BI
))>0 );
180 if (CLG_(current_state
).nonskipped
)
181 cost_Bi
= CLG_(current_state
).nonskipped
->skipped
+ fullOffset(EG_BI
);
183 cost_Bi
= CLG_(cost_base
) + ii
->cost_offset
+ ii
->eventset
->offset
[EG_BI
];
185 fullOffset_Bi
= fullOffset(EG_BI
);
186 CLG_(current_state
).cost
[ fullOffset_Bi
]++;
189 CLG_(current_state
).cost
[ fullOffset_Bi
+1 ]++;
194 /*------------------------------------------------------------*/
195 /*--- Instrumentation structures and event queue handling ---*/
196 /*------------------------------------------------------------*/
198 /* Maintain an ordered list of memory events which are outstanding, in
199 the sense that no IR has yet been generated to do the relevant
200 helper calls. The BB is scanned top to bottom and memory events
201 are added to the end of the list, merging with the most recent
202 notified event where possible (Dw immediately following Dr and
203 having the same size and EA can be merged).
205 This merging is done so that for architectures which have
206 load-op-store instructions (x86, amd64), the insn is treated as if
207 it makes just one memory reference (a modify), rather than two (a
208 read followed by a write at the same address).
210 At various points the list will need to be flushed, that is, IR
211 generated from it. That must happen before any possible exit from
212 the block (the end, or an IRStmt_Exit). Flushing also takes place
213 when there is no space to add a new event.
215 If we require the simulation statistics to be up to date with
216 respect to possible memory exceptions, then the list would have to
217 be flushed before each memory reference. That would however lose
218 performance by inhibiting event-merging during flushing.
220 Flushing the list consists of walking it start to end and emitting
221 instrumentation IR for each event, in the order in which they
222 appear. It may be possible to emit a single call for two adjacent
223 events in order to reduce the number of helper function calls made.
224 For example, it could well be profitable to handle two adjacent Ir
225 events with a single helper call. */
233 Ev_Ir
, // Instruction read
236 Ev_Dm
, // Data modify (read then write)
237 Ev_Bc
, // branch conditional
238 Ev_Bi
, // branch indirect (to unknown destination)
239 Ev_G
// Global bus event
263 IRAtom
* taken
; /* :: Ity_I1 */
274 static void init_Event ( Event
* ev
) {
275 VG_(memset
)(ev
, 0, sizeof(Event
));
278 static IRAtom
* get_Event_dea ( Event
* ev
) {
280 case Ev_Dr
: return ev
->Ev
.Dr
.ea
;
281 case Ev_Dw
: return ev
->Ev
.Dw
.ea
;
282 case Ev_Dm
: return ev
->Ev
.Dm
.ea
;
283 default: tl_assert(0);
287 static Int
get_Event_dszB ( Event
* ev
) {
289 case Ev_Dr
: return ev
->Ev
.Dr
.szB
;
290 case Ev_Dw
: return ev
->Ev
.Dw
.szB
;
291 case Ev_Dm
: return ev
->Ev
.Dm
.szB
;
292 default: tl_assert(0);
297 /* Up to this many unnotified events are allowed. Number is
298 arbitrary. Larger numbers allow more event merging to occur, but
299 potentially induce more spilling due to extending live ranges of
300 address temporaries. */
304 /* A struct which holds all the running state during instrumentation.
305 Mostly to avoid passing loads of parameters everywhere. */
307 /* The current outstanding-memory-event list. */
308 Event events
[N_EVENTS
];
311 /* The array of InstrInfo's is part of BB struct. */
314 /* BB seen before (ie. re-instrumentation) */
317 /* Number InstrInfo bins 'used' so far. */
320 // current offset of guest instructions from BB start
323 /* The output SB being constructed. */
328 static void showEvent ( Event
* ev
)
332 VG_(printf
)("Ir (InstrInfo %p) at +%u\n",
333 ev
->inode
, ev
->inode
->instr_offset
);
336 VG_(printf
)("Dr (InstrInfo %p) at +%u %d EA=",
337 ev
->inode
, ev
->inode
->instr_offset
, ev
->Ev
.Dr
.szB
);
338 ppIRExpr(ev
->Ev
.Dr
.ea
);
342 VG_(printf
)("Dw (InstrInfo %p) at +%u %d EA=",
343 ev
->inode
, ev
->inode
->instr_offset
, ev
->Ev
.Dw
.szB
);
344 ppIRExpr(ev
->Ev
.Dw
.ea
);
348 VG_(printf
)("Dm (InstrInfo %p) at +%u %d EA=",
349 ev
->inode
, ev
->inode
->instr_offset
, ev
->Ev
.Dm
.szB
);
350 ppIRExpr(ev
->Ev
.Dm
.ea
);
354 VG_(printf
)("Bc %p GA=", ev
->inode
);
355 ppIRExpr(ev
->Ev
.Bc
.taken
);
359 VG_(printf
)("Bi %p DST=", ev
->inode
);
360 ppIRExpr(ev
->Ev
.Bi
.dst
);
364 VG_(printf
)("G %p\n", ev
->inode
);
372 /* Generate code for all outstanding memory events, and mark the queue
373 empty. Code is generated into cgs->sbOut, and this activity
374 'consumes' slots in cgs->bb. */
376 static void flushEvents ( ClgState
* clgs
)
378 Int i
, regparms
, inew
;
379 const HChar
* helperName
;
388 if (!clgs
->seen_before
) {
389 // extend event sets as needed
390 // available sets: D0 Dr
391 for(i
=0; i
<clgs
->events_used
; i
++) {
392 ev
= &clgs
->events
[i
];
395 // Ir event always is first for a guest instruction
396 CLG_ASSERT(ev
->inode
->eventset
== 0);
397 ev
->inode
->eventset
= CLG_(sets
).base
;
400 // extend event set by Dr counters
401 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
406 // extend event set by Dw counters
407 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
411 // extend event set by Bc counters
412 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
416 // extend event set by Bi counters
417 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
421 // extend event set by Bus counter
422 ev
->inode
->eventset
= CLG_(add_event_group
)(ev
->inode
->eventset
,
431 for(i
= 0; i
< clgs
->events_used
; i
= inew
) {
438 /* generate IR to notify event i and possibly the ones
439 immediately following it. */
440 tl_assert(i
>= 0 && i
< clgs
->events_used
);
442 ev
= &clgs
->events
[i
];
443 ev2
= ( i
< clgs
->events_used
-1 ? &clgs
->events
[i
+1] : NULL
);
444 ev3
= ( i
< clgs
->events_used
-2 ? &clgs
->events
[i
+2] : NULL
);
447 VG_(printf
)(" flush ");
451 i_node_expr
= mkIRExpr_HWord( (HWord
)ev
->inode
);
453 /* Decide on helper fn to call and args to pass it, and advance
455 Dm events have same effect as Dw events */
458 /* Merge an Ir with a following Dr. */
459 if (ev2
&& ev2
->tag
== Ev_Dr
) {
460 /* Why is this true? It's because we're merging an Ir
461 with a following Dr. The Ir derives from the
462 instruction's IMark and the Dr from data
463 references which follow it. In short it holds
464 because each insn starts with an IMark, hence an
465 Ev_Ir, and so these Dr must pertain to the
466 immediately preceding Ir. Same applies to analogous
467 assertions in the subsequent cases. */
468 tl_assert(ev2
->inode
== ev
->inode
);
469 helperName
= CLG_(cachesim
).log_1I1Dr_name
;
470 helperAddr
= CLG_(cachesim
).log_1I1Dr
;
471 argv
= mkIRExprVec_3( i_node_expr
,
473 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
477 /* Merge an Ir with a following Dw/Dm. */
479 if (ev2
&& (ev2
->tag
== Ev_Dw
|| ev2
->tag
== Ev_Dm
)) {
480 tl_assert(ev2
->inode
== ev
->inode
);
481 helperName
= CLG_(cachesim
).log_1I1Dw_name
;
482 helperAddr
= CLG_(cachesim
).log_1I1Dw
;
483 argv
= mkIRExprVec_3( i_node_expr
,
485 mkIRExpr_HWord( get_Event_dszB(ev2
) ) );
489 /* Merge an Ir with two following Irs. */
491 if (ev2
&& ev3
&& ev2
->tag
== Ev_Ir
&& ev3
->tag
== Ev_Ir
) {
492 helperName
= CLG_(cachesim
).log_3I0D_name
;
493 helperAddr
= CLG_(cachesim
).log_3I0D
;
494 argv
= mkIRExprVec_3( i_node_expr
,
495 mkIRExpr_HWord( (HWord
)ev2
->inode
),
496 mkIRExpr_HWord( (HWord
)ev3
->inode
) );
500 /* Merge an Ir with one following Ir. */
502 if (ev2
&& ev2
->tag
== Ev_Ir
) {
503 helperName
= CLG_(cachesim
).log_2I0D_name
;
504 helperAddr
= CLG_(cachesim
).log_2I0D
;
505 argv
= mkIRExprVec_2( i_node_expr
,
506 mkIRExpr_HWord( (HWord
)ev2
->inode
) );
510 /* No merging possible; emit as-is. */
512 helperName
= CLG_(cachesim
).log_1I0D_name
;
513 helperAddr
= CLG_(cachesim
).log_1I0D
;
514 argv
= mkIRExprVec_1( i_node_expr
);
520 /* Data read or modify */
521 helperName
= CLG_(cachesim
).log_0I1Dr_name
;
522 helperAddr
= CLG_(cachesim
).log_0I1Dr
;
523 argv
= mkIRExprVec_3( i_node_expr
,
525 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
532 helperName
= CLG_(cachesim
).log_0I1Dw_name
;
533 helperAddr
= CLG_(cachesim
).log_0I1Dw
;
534 argv
= mkIRExprVec_3( i_node_expr
,
536 mkIRExpr_HWord( get_Event_dszB(ev
) ) );
541 /* Conditional branch */
542 helperName
= "log_cond_branch";
543 helperAddr
= &log_cond_branch
;
544 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bc
.taken
);
549 /* Branch to an unknown destination */
550 helperName
= "log_ind_branch";
551 helperAddr
= &log_ind_branch
;
552 argv
= mkIRExprVec_2( i_node_expr
, ev
->Ev
.Bi
.dst
);
557 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
558 helperName
= "log_global_event";
559 helperAddr
= &log_global_event
;
560 argv
= mkIRExprVec_1( i_node_expr
);
570 VG_(printf
)(" merge ");
574 VG_(printf
)(" merge ");
578 VG_(printf
)(" call %s (%p)\n",
579 helperName
, helperAddr
);
582 /* helper could be unset depending on the simulator used */
583 if (helperAddr
== 0) continue;
585 /* Add the helper. */
586 tl_assert(helperName
);
587 tl_assert(helperAddr
);
589 di
= unsafeIRDirty_0_N( regparms
,
590 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
592 addStmtToIRSB( clgs
->sbOut
, IRStmt_Dirty(di
) );
595 clgs
->events_used
= 0;
598 static void addEvent_Ir ( ClgState
* clgs
, InstrInfo
* inode
)
601 tl_assert(clgs
->seen_before
|| (inode
->eventset
== 0));
602 if (!CLG_(clo
).simulate_cache
) return;
604 if (clgs
->events_used
== N_EVENTS
)
606 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
607 evt
= &clgs
->events
[clgs
->events_used
];
615 void addEvent_Dr ( ClgState
* clgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
618 tl_assert(isIRAtom(ea
));
619 tl_assert(datasize
>= 1);
620 if (!CLG_(clo
).simulate_cache
) return;
621 tl_assert(datasize
<= CLG_(min_line_size
));
623 if (clgs
->events_used
== N_EVENTS
)
625 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
626 evt
= &clgs
->events
[clgs
->events_used
];
630 evt
->Ev
.Dr
.szB
= datasize
;
636 void addEvent_Dw ( ClgState
* clgs
, InstrInfo
* inode
, Int datasize
, IRAtom
* ea
)
639 tl_assert(isIRAtom(ea
));
640 tl_assert(datasize
>= 1);
641 if (!CLG_(clo
).simulate_cache
) return;
642 tl_assert(datasize
<= CLG_(min_line_size
));
644 /* Is it possible to merge this write with the preceding read? */
645 if (clgs
->events_used
> 0) {
646 Event
* lastEvt
= &clgs
->events
[clgs
->events_used
-1];
647 if ( lastEvt
->tag
== Ev_Dr
648 && lastEvt
->Ev
.Dr
.szB
== datasize
649 && lastEvt
->inode
== inode
650 && eqIRAtom(lastEvt
->Ev
.Dr
.ea
, ea
))
652 lastEvt
->tag
= Ev_Dm
;
657 /* No. Add as normal. */
658 if (clgs
->events_used
== N_EVENTS
)
660 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
661 evt
= &clgs
->events
[clgs
->events_used
];
665 evt
->Ev
.Dw
.szB
= datasize
;
671 void addEvent_D_guarded ( ClgState
* clgs
, InstrInfo
* inode
,
672 Int datasize
, IRAtom
* ea
, IRAtom
* guard
,
675 tl_assert(isIRAtom(ea
));
677 tl_assert(isIRAtom(guard
));
678 tl_assert(datasize
>= 1);
679 if (!CLG_(clo
).simulate_cache
) return;
680 tl_assert(datasize
<= CLG_(min_line_size
));
682 /* Adding guarded memory actions and merging them with the existing
683 queue is too complex. Simply flush the queue and add this
684 action immediately. Since guarded loads and stores are pretty
685 rare, this is not thought likely to cause any noticeable
686 performance loss as a result of the loss of event-merging
688 tl_assert(clgs
->events_used
>= 0);
690 tl_assert(clgs
->events_used
== 0);
691 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
693 const HChar
* helperName
;
698 i_node_expr
= mkIRExpr_HWord( (HWord
)inode
);
699 helperName
= isWrite
? CLG_(cachesim
).log_0I1Dw_name
700 : CLG_(cachesim
).log_0I1Dr_name
;
701 helperAddr
= isWrite
? CLG_(cachesim
).log_0I1Dw
702 : CLG_(cachesim
).log_0I1Dr
;
703 argv
= mkIRExprVec_3( i_node_expr
,
704 ea
, mkIRExpr_HWord( datasize
) );
706 di
= unsafeIRDirty_0_N(
708 helperName
, VG_(fnptr_to_fnentry
)( helperAddr
),
711 addStmtToIRSB( clgs
->sbOut
, IRStmt_Dirty(di
) );
715 void addEvent_Bc ( ClgState
* clgs
, InstrInfo
* inode
, IRAtom
* guard
)
718 tl_assert(isIRAtom(guard
));
719 tl_assert(typeOfIRExpr(clgs
->sbOut
->tyenv
, guard
)
720 == (sizeof(RegWord
)==4 ? Ity_I32
: Ity_I64
));
721 if (!CLG_(clo
).simulate_branch
) return;
723 if (clgs
->events_used
== N_EVENTS
)
725 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
726 evt
= &clgs
->events
[clgs
->events_used
];
730 evt
->Ev
.Bc
.taken
= guard
;
735 void addEvent_Bi ( ClgState
* clgs
, InstrInfo
* inode
, IRAtom
* whereTo
)
738 tl_assert(isIRAtom(whereTo
));
739 tl_assert(typeOfIRExpr(clgs
->sbOut
->tyenv
, whereTo
)
740 == (sizeof(RegWord
)==4 ? Ity_I32
: Ity_I64
));
741 if (!CLG_(clo
).simulate_branch
) return;
743 if (clgs
->events_used
== N_EVENTS
)
745 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
746 evt
= &clgs
->events
[clgs
->events_used
];
750 evt
->Ev
.Bi
.dst
= whereTo
;
755 void addEvent_G ( ClgState
* clgs
, InstrInfo
* inode
)
758 if (!CLG_(clo
).collect_bus
) return;
760 if (clgs
->events_used
== N_EVENTS
)
762 tl_assert(clgs
->events_used
>= 0 && clgs
->events_used
< N_EVENTS
);
763 evt
= &clgs
->events
[clgs
->events_used
];
770 /* Initialise or check (if already seen before) an InstrInfo for next insn.
771 We only can set instr_offset/instr_size here. The required event set and
772 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
773 instructions. The event set is extended as required on flush of the event
774 queue (when Dm events were determined), cost offsets are determined at
775 end of BB instrumentation. */
777 InstrInfo
* next_InstrInfo ( ClgState
* clgs
, UInt instr_size
)
780 tl_assert(clgs
->ii_index
>= 0);
781 tl_assert(clgs
->ii_index
< clgs
->bb
->instr_count
);
782 ii
= &clgs
->bb
->instr
[ clgs
->ii_index
];
784 if (clgs
->seen_before
) {
785 CLG_ASSERT(ii
->instr_offset
== clgs
->instr_offset
);
786 CLG_ASSERT(ii
->instr_size
== instr_size
);
789 ii
->instr_offset
= clgs
->instr_offset
;
790 ii
->instr_size
= instr_size
;
796 clgs
->instr_offset
+= instr_size
;
797 CLG_(stat
).distinct_instrs
++;
802 // return total number of cost values needed for this BB
804 UInt
update_cost_offsets( ClgState
* clgs
)
808 UInt cost_offset
= 0;
810 CLG_ASSERT(clgs
->bb
->instr_count
== clgs
->ii_index
);
811 for(i
=0; i
<clgs
->ii_index
; i
++) {
812 ii
= &clgs
->bb
->instr
[i
];
813 if (clgs
->seen_before
) {
814 CLG_ASSERT(ii
->cost_offset
== cost_offset
);
816 ii
->cost_offset
= cost_offset
;
817 cost_offset
+= ii
->eventset
? ii
->eventset
->size
: 0;
823 /*------------------------------------------------------------*/
824 /*--- Instrumentation ---*/
825 /*------------------------------------------------------------*/
827 #if defined(VG_BIGENDIAN)
828 # define CLGEndness Iend_BE
829 #elif defined(VG_LITTLEENDIAN)
830 # define CLGEndness Iend_LE
832 # error "Unknown endianness"
836 Addr
IRConst2Addr(IRConst
* con
)
840 if (sizeof(RegWord
) == 4) {
841 CLG_ASSERT( con
->tag
== Ico_U32
);
844 else if (sizeof(RegWord
) == 8) {
845 CLG_ASSERT( con
->tag
== Ico_U64
);
849 VG_(tool_panic
)("Callgrind: invalid Addr type");
854 /* First pass over a BB to instrument, counting instructions and jumps
855 * This is needed for the size of the BB struct to allocate
857 * Called from CLG_(get_bb)
859 void CLG_(collectBlockInfo
)(IRSB
* sbIn
,
860 /*INOUT*/ UInt
* instrs
,
861 /*INOUT*/ UInt
* cjmps
,
862 /*INOUT*/ Bool
* cjmp_inverted
)
866 Addr instrAddr
=0, jumpDst
;
868 Bool toNextInstr
= False
;
870 // Ist_Exit has to be ignored in preamble code, before first IMark:
871 // preamble code is added by VEX for self modifying code, and has
872 // nothing to do with client code
873 Bool inPreamble
= True
;
877 for (i
= 0; i
< sbIn
->stmts_used
; i
++) {
879 if (Ist_IMark
== st
->tag
) {
882 instrAddr
= st
->Ist
.IMark
.addr
;
883 instrLen
= st
->Ist
.IMark
.len
;
888 if (inPreamble
) continue;
889 if (Ist_Exit
== st
->tag
) {
890 jumpDst
= IRConst2Addr(st
->Ist
.Exit
.dst
);
891 toNextInstr
= (jumpDst
== instrAddr
+ instrLen
);
897 /* if the last instructions of BB conditionally jumps to next instruction
898 * (= first instruction of next BB in memory), this is a inverted by VEX.
900 *cjmp_inverted
= toNextInstr
;
904 void addConstMemStoreStmt( IRSB
* bbOut
, UWord addr
, UInt val
, IRType hWordTy
)
906 addStmtToIRSB( bbOut
,
907 IRStmt_Store(CLGEndness
,
908 IRExpr_Const(hWordTy
== Ity_I32
?
909 IRConst_U32( addr
) :
910 IRConst_U64( addr
)),
911 IRExpr_Const(IRConst_U32(val
)) ));
915 /* add helper call to setup_bbcc, with pointer to BB struct as argument
917 * precondition for setup_bbcc:
918 * - jmps_passed has number of cond.jumps passed in last executed BB
919 * - current_bbcc has a pointer to the BBCC of the last executed BB
920 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
921 * current_bbcc->bb->jmp_addr
922 * gives the address of the jump source.
924 * the setup does 2 things:
926 * * Unwind own call stack, i.e sync our ESP with real ESP
927 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
928 * * For CALLs or JMPs crossing objects, record call arg +
929 * push are on own call stack
931 * - prepare for cache log functions:
932 * set current_bbcc to BBCC that gets the costs for this BB execution
936 void addBBSetupCall(ClgState
* clgs
)
939 IRExpr
*arg1
, **argv
;
941 arg1
= mkIRExpr_HWord( (HWord
)clgs
->bb
);
942 argv
= mkIRExprVec_1(arg1
);
943 di
= unsafeIRDirty_0_N( 1, "setup_bbcc",
944 VG_(fnptr_to_fnentry
)( & CLG_(setup_bbcc
) ),
946 addStmtToIRSB( clgs
->sbOut
, IRStmt_Dirty(di
) );
951 IRSB
* CLG_(instrument
)( VgCallbackClosure
* closure
,
953 const VexGuestLayout
* layout
,
954 const VexGuestExtents
* vge
,
955 const VexArchInfo
* archinfo_host
,
956 IRType gWordTy
, IRType hWordTy
)
961 InstrInfo
* curr_inode
= NULL
;
964 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
966 if (gWordTy
!= hWordTy
) {
967 /* We don't currently support this case. */
968 VG_(tool_panic
)("host/guest word size mismatch");
971 // No instrumentation if it is switched off
972 if (! CLG_(instrument_state
)) {
973 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
974 (Addr
)closure
->readdr
);
978 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr
)closure
->readdr
);
980 /* Set up SB for instrumented IR */
981 clgs
.sbOut
= deepCopyIRSBExceptStmts(sbIn
);
983 // Copy verbatim any IR preamble preceding the first IMark
985 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
986 addStmtToIRSB( clgs
.sbOut
, sbIn
->stmts
[i
] );
990 // Get the first statement, and origAddr from it
991 CLG_ASSERT(sbIn
->stmts_used
>0);
992 CLG_ASSERT(i
< sbIn
->stmts_used
);
994 CLG_ASSERT(Ist_IMark
== st
->tag
);
996 origAddr
= st
->Ist
.IMark
.addr
+ st
->Ist
.IMark
.delta
;
997 CLG_ASSERT(origAddr
== st
->Ist
.IMark
.addr
998 + st
->Ist
.IMark
.delta
); // XXX: check no overflow
1000 /* Get BB struct (creating if necessary).
1001 * JS: The hash table is keyed with orig_addr_noredir -- important!
1002 * JW: Why? If it is because of different chasing of the redirection,
1003 * this is not needed, as chasing is switched off in callgrind
1005 clgs
.bb
= CLG_(get_bb
)(origAddr
, sbIn
, &(clgs
.seen_before
));
1007 addBBSetupCall(&clgs
);
1009 // Set up running state
1010 clgs
.events_used
= 0;
1012 clgs
.instr_offset
= 0;
1014 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
1016 st
= sbIn
->stmts
[i
];
1017 CLG_ASSERT(isFlatIRStmt(st
));
1028 Addr cia
= st
->Ist
.IMark
.addr
+ st
->Ist
.IMark
.delta
;
1029 UInt isize
= st
->Ist
.IMark
.len
;
1030 CLG_ASSERT(clgs
.instr_offset
== cia
- origAddr
);
1031 // If Vex fails to decode an instruction, the size will be zero.
1032 // Pretend otherwise.
1033 if (isize
== 0) isize
= VG_MIN_INSTR_SZB
;
1035 // Sanity-check size.
1036 tl_assert( (VG_MIN_INSTR_SZB
<= isize
&& isize
<= VG_MAX_INSTR_SZB
)
1037 || VG_CLREQ_SZB
== isize
);
1039 // Init the inode, record it as the current one.
1040 // Subsequent Dr/Dw/Dm events from the same instruction will
1042 curr_inode
= next_InstrInfo (&clgs
, isize
);
1044 addEvent_Ir( &clgs
, curr_inode
);
1049 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
1050 if (data
->tag
== Iex_Load
) {
1051 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
1052 // Note also, endianness info is ignored. I guess
1053 // that's not interesting.
1054 addEvent_Dr( &clgs
, curr_inode
,
1055 sizeofIRType(data
->Iex
.Load
.ty
), aexpr
);
1061 IRExpr
* data
= st
->Ist
.Store
.data
;
1062 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
1063 addEvent_Dw( &clgs
, curr_inode
,
1064 sizeofIRType(typeOfIRExpr(sbIn
->tyenv
, data
)), aexpr
);
1069 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
1070 IRExpr
* data
= sg
->data
;
1071 IRExpr
* addr
= sg
->addr
;
1072 IRType type
= typeOfIRExpr(tyenv
, data
);
1073 tl_assert(type
!= Ity_INVALID
);
1074 addEvent_D_guarded( &clgs
, curr_inode
,
1075 sizeofIRType(type
), addr
, sg
->guard
,
1081 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
1082 IRType type
= Ity_INVALID
; /* loaded type */
1083 IRType typeWide
= Ity_INVALID
; /* after implicit widening */
1084 IRExpr
* addr
= lg
->addr
;
1085 typeOfIRLoadGOp(lg
->cvt
, &typeWide
, &type
);
1086 tl_assert(type
!= Ity_INVALID
);
1087 addEvent_D_guarded( &clgs
, curr_inode
,
1088 sizeofIRType(type
), addr
, lg
->guard
,
1089 False
/*!isWrite*/ );
1095 IRDirty
* d
= st
->Ist
.Dirty
.details
;
1096 if (d
->mFx
!= Ifx_None
) {
1097 /* This dirty helper accesses memory. Collect the details. */
1098 tl_assert(d
->mAddr
!= NULL
);
1099 tl_assert(d
->mSize
!= 0);
1100 dataSize
= d
->mSize
;
1101 // Large (eg. 28B, 108B, 512B on x86) data-sized
1102 // instructions will be done inaccurately, but they're
1103 // very rare and this avoids errors from hitting more
1104 // than two cache lines in the simulation.
1105 if (CLG_(clo
).simulate_cache
&& dataSize
> CLG_(min_line_size
))
1106 dataSize
= CLG_(min_line_size
);
1107 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1108 addEvent_Dr( &clgs
, curr_inode
, dataSize
, d
->mAddr
);
1109 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1110 addEvent_Dw( &clgs
, curr_inode
, dataSize
, d
->mAddr
);
1112 tl_assert(d
->mAddr
== NULL
);
1113 tl_assert(d
->mSize
== 0);
1119 /* We treat it as a read and a write of the location. I
1120 think that is the same behaviour as it was before IRCAS
1121 was introduced, since prior to that point, the Vex
1122 front ends would translate a lock-prefixed instruction
1123 into a (normal) read followed by a (normal) write. */
1125 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1126 CLG_ASSERT(cas
->addr
&& isIRAtom(cas
->addr
));
1127 CLG_ASSERT(cas
->dataLo
);
1128 dataSize
= sizeofIRType(typeOfIRExpr(sbIn
->tyenv
, cas
->dataLo
));
1129 if (cas
->dataHi
!= NULL
)
1130 dataSize
*= 2; /* since this is a doubleword-cas */
1131 addEvent_Dr( &clgs
, curr_inode
, dataSize
, cas
->addr
);
1132 addEvent_Dw( &clgs
, curr_inode
, dataSize
, cas
->addr
);
1133 addEvent_G( &clgs
, curr_inode
);
1139 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1141 dataTy
= typeOfIRTemp(sbIn
->tyenv
, st
->Ist
.LLSC
.result
);
1142 addEvent_Dr( &clgs
, curr_inode
,
1143 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1144 /* flush events before LL, should help SC to succeed */
1145 flushEvents( &clgs
);
1148 dataTy
= typeOfIRExpr(sbIn
->tyenv
, st
->Ist
.LLSC
.storedata
);
1149 addEvent_Dw( &clgs
, curr_inode
,
1150 sizeofIRType(dataTy
), st
->Ist
.LLSC
.addr
);
1151 /* I don't know whether the global-bus-lock cost should
1152 be attributed to the LL or the SC, but it doesn't
1153 really matter since they always have to be used in
1154 pairs anyway. Hence put it (quite arbitrarily) on
1156 addEvent_G( &clgs
, curr_inode
);
1162 Bool guest_exit
, inverted
;
1164 /* VEX code generation sometimes inverts conditional branches.
1165 * As Callgrind counts (conditional) jumps, it has to correct
1166 * inversions. The heuristic is the following:
1167 * (1) Callgrind switches off SB chasing and unrolling, and
1168 * therefore it assumes that a candidate for inversion only is
1169 * the last conditional branch in an SB.
1170 * (2) inversion is assumed if the branch jumps to the address of
1171 * the next guest instruction in memory.
1172 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1174 * Branching behavior is also used for branch prediction. Note that
1175 * above heuristic is different from what Cachegrind does.
1176 * Cachegrind uses (2) for all branches.
1178 if (cJumps
+1 == clgs
.bb
->cjmp_count
)
1179 inverted
= clgs
.bb
->cjmp_inverted
;
1183 // call branch predictor only if this is a branch in guest code
1184 guest_exit
= (st
->Ist
.Exit
.jk
== Ijk_Boring
) ||
1185 (st
->Ist
.Exit
.jk
== Ijk_Call
) ||
1186 (st
->Ist
.Exit
.jk
== Ijk_Ret
);
1189 /* Stuff to widen the guard expression to a host word, so
1190 we can pass it to the branch predictor simulation
1191 functions easily. */
1192 IRType tyW
= hWordTy
;
1193 IROp widen
= tyW
==Ity_I32
? Iop_1Uto32
: Iop_1Uto64
;
1194 IROp opXOR
= tyW
==Ity_I32
? Iop_Xor32
: Iop_Xor64
;
1195 IRTemp guard1
= newIRTemp(clgs
.sbOut
->tyenv
, Ity_I1
);
1196 IRTemp guardW
= newIRTemp(clgs
.sbOut
->tyenv
, tyW
);
1197 IRTemp guard
= newIRTemp(clgs
.sbOut
->tyenv
, tyW
);
1198 IRExpr
* one
= tyW
==Ity_I32
? IRExpr_Const(IRConst_U32(1))
1199 : IRExpr_Const(IRConst_U64(1));
1201 /* Widen the guard expression. */
1202 addStmtToIRSB( clgs
.sbOut
,
1203 IRStmt_WrTmp( guard1
, st
->Ist
.Exit
.guard
));
1204 addStmtToIRSB( clgs
.sbOut
,
1205 IRStmt_WrTmp( guardW
,
1207 IRExpr_RdTmp(guard1
))) );
1208 /* If the exit is inverted, invert the sense of the guard. */
1213 inverted
? IRExpr_Binop(opXOR
, IRExpr_RdTmp(guardW
), one
)
1214 : IRExpr_RdTmp(guardW
)
1216 /* And post the event. */
1217 addEvent_Bc( &clgs
, curr_inode
, IRExpr_RdTmp(guard
) );
1220 /* We may never reach the next statement, so need to flush
1221 all outstanding transactions now. */
1222 flushEvents( &clgs
);
1224 CLG_ASSERT(clgs
.ii_index
>0);
1225 if (!clgs
.seen_before
) {
1228 if (st
->Ist
.Exit
.jk
== Ijk_Call
) jk
= jk_Call
;
1229 else if (st
->Ist
.Exit
.jk
== Ijk_Ret
) jk
= jk_Return
;
1231 if (IRConst2Addr(st
->Ist
.Exit
.dst
) ==
1232 origAddr
+ curr_inode
->instr_offset
+ curr_inode
->instr_size
)
1238 clgs
.bb
->jmp
[cJumps
].instr
= clgs
.ii_index
-1;
1239 clgs
.bb
->jmp
[cJumps
].jmpkind
= jk
;
1242 /* Update global variable jmps_passed before the jump
1243 * A correction is needed if VEX inverted the last jump condition
1245 UInt val
= inverted
? cJumps
+1 : cJumps
;
1246 addConstMemStoreStmt( clgs
.sbOut
,
1247 (UWord
) &CLG_(current_state
).jmps_passed
,
1259 /* Copy the original statement */
1260 addStmtToIRSB( clgs
.sbOut
, st
);
1263 VG_(printf
)(" pass ");
1269 /* Deal with branches to unknown destinations. Except ignore ones
1270 which are function returns as we assume the return stack
1271 predictor never mispredicts. */
1272 if ((sbIn
->jumpkind
== Ijk_Boring
) || (sbIn
->jumpkind
== Ijk_Call
)) {
1273 if (0) { ppIRExpr( sbIn
->next
); VG_(printf
)("\n"); }
1274 switch (sbIn
->next
->tag
) {
1276 break; /* boring - branch to known address */
1278 /* looks like an indirect branch (branch to unknown) */
1279 addEvent_Bi( &clgs
, curr_inode
, sbIn
->next
);
1282 /* shouldn't happen - if the incoming IR is properly
1283 flattened, should only have tmp and const cases to
1289 /* At the end of the bb. Flush outstandings. */
1290 flushEvents( &clgs
);
1292 /* Update global variable jmps_passed at end of SB.
1293 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1294 * this can be omitted if there is no conditional jump in this SB.
1295 * A correction is needed if VEX inverted the last jump condition
1298 UInt jmps_passed
= cJumps
;
1299 if (clgs
.bb
->cjmp_inverted
) jmps_passed
--;
1300 addConstMemStoreStmt( clgs
.sbOut
,
1301 (UWord
) &CLG_(current_state
).jmps_passed
,
1302 jmps_passed
, hWordTy
);
1304 CLG_ASSERT(clgs
.bb
->cjmp_count
== cJumps
);
1305 CLG_ASSERT(clgs
.bb
->instr_count
== clgs
.ii_index
);
1307 /* Info for final exit from BB */
1311 if (sbIn
->jumpkind
== Ijk_Call
) jk
= jk_Call
;
1312 else if (sbIn
->jumpkind
== Ijk_Ret
) jk
= jk_Return
;
1315 if ((sbIn
->next
->tag
== Iex_Const
) &&
1316 (IRConst2Addr(sbIn
->next
->Iex
.Const
.con
) ==
1317 origAddr
+ clgs
.instr_offset
))
1320 clgs
.bb
->jmp
[cJumps
].jmpkind
= jk
;
1321 /* Instruction index of the call/ret at BB end
1322 * (it is wrong for fall-through, but does not matter) */
1323 clgs
.bb
->jmp
[cJumps
].instr
= clgs
.ii_index
-1;
1326 /* swap information of last exit with final exit if inverted */
1327 if (clgs
.bb
->cjmp_inverted
) {
1331 jk
= clgs
.bb
->jmp
[cJumps
].jmpkind
;
1332 clgs
.bb
->jmp
[cJumps
].jmpkind
= clgs
.bb
->jmp
[cJumps
-1].jmpkind
;
1333 clgs
.bb
->jmp
[cJumps
-1].jmpkind
= jk
;
1334 instr
= clgs
.bb
->jmp
[cJumps
].instr
;
1335 clgs
.bb
->jmp
[cJumps
].instr
= clgs
.bb
->jmp
[cJumps
-1].instr
;
1336 clgs
.bb
->jmp
[cJumps
-1].instr
= instr
;
1339 if (clgs
.seen_before
) {
1340 CLG_ASSERT(clgs
.bb
->cost_count
== update_cost_offsets(&clgs
));
1341 CLG_ASSERT(clgs
.bb
->instr_len
== clgs
.instr_offset
);
1344 clgs
.bb
->cost_count
= update_cost_offsets(&clgs
);
1345 clgs
.bb
->instr_len
= clgs
.instr_offset
;
1348 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1349 origAddr
, clgs
.bb
->instr_len
,
1350 clgs
.bb
->cjmp_count
, clgs
.bb
->cost_count
);
1352 CLG_DEBUG(3, " [ ");
1353 for (i
=0;i
<cJumps
;i
++)
1354 CLG_DEBUG(3, "%u ", clgs
.bb
->jmp
[i
].instr
);
1355 CLG_DEBUG(3, "], last inverted: %s \n",
1356 clgs
.bb
->cjmp_inverted
? "yes":"no");
1362 /*--------------------------------------------------------------------*/
1363 /*--- Discarding BB info ---*/
1364 /*--------------------------------------------------------------------*/
1366 // Called when a translation is removed from the translation cache for
1367 // any reason at all: to free up space, because the guest code was
1368 // unmapped or modified, or for any arbitrary reason.
1370 void clg_discard_superblock_info ( Addr orig_addr
, VexGuestExtents vge
)
1372 tl_assert(vge
.n_used
> 0);
1375 VG_(printf
)( "discard_superblock_info: %p, %p, %llu\n",
1377 (void*)vge
.base
[0], (ULong
)vge
.len
[0]);
1379 // Get BB info, remove from table, free BB info. Simple!
1380 // When created, the BB is keyed by the first instruction address,
1381 // (not orig_addr, but eventually redirected address). Thus, we
1382 // use the first instruction address in vge.
1383 CLG_(delete_bb
)(vge
.base
[0]);
1387 /*------------------------------------------------------------*/
1388 /*--- CLG_(fini)() and related function ---*/
1389 /*------------------------------------------------------------*/
1393 static void zero_thread_cost(thread_info
* t
)
1397 for(i
= 0; i
< CLG_(current_call_stack
).sp
; i
++) {
1398 if (!CLG_(current_call_stack
).entry
[i
].jcc
) continue;
1400 /* reset call counters to current for active calls */
1401 CLG_(copy_cost
)( CLG_(sets
).full
,
1402 CLG_(current_call_stack
).entry
[i
].enter_cost
,
1403 CLG_(current_state
).cost
);
1404 CLG_(current_call_stack
).entry
[i
].jcc
->call_counter
= 0;
1407 CLG_(forall_bbccs
)(CLG_(zero_bbcc
));
1409 /* set counter for last dump */
1410 CLG_(copy_cost
)( CLG_(sets
).full
,
1411 t
->lastdump_cost
, CLG_(current_state
).cost
);
1414 void CLG_(zero_all_cost
)(Bool only_current_thread
)
1416 if (VG_(clo_verbosity
) > 1)
1417 VG_(message
)(Vg_DebugMsg
, " Zeroing costs...\n");
1419 if (only_current_thread
)
1420 zero_thread_cost(CLG_(get_current_thread
)());
1422 CLG_(forall_threads
)(zero_thread_cost
);
1424 if (VG_(clo_verbosity
) > 1)
1425 VG_(message
)(Vg_DebugMsg
, " ...done\n");
1429 void unwind_thread(thread_info
* t
)
1431 /* unwind signal handlers */
1432 while(CLG_(current_state
).sig
!=0)
1433 CLG_(post_signal
)(CLG_(current_tid
),CLG_(current_state
).sig
);
1435 /* unwind regular call stack */
1436 while(CLG_(current_call_stack
).sp
>0)
1437 CLG_(pop_call_stack
)();
1439 /* reset context and function stack for context generation */
1440 CLG_(init_exec_state
)( &CLG_(current_state
) );
1441 CLG_(current_fn_stack
).top
= CLG_(current_fn_stack
).bottom
;
1445 void zero_state_cost(thread_info
* t
)
1447 CLG_(zero_cost
)( CLG_(sets
).full
, CLG_(current_state
).cost
);
1450 void CLG_(set_instrument_state
)(const HChar
* reason
, Bool state
)
1452 if (CLG_(instrument_state
) == state
) {
1453 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1454 reason
, state
? "ON" : "OFF");
1457 CLG_(instrument_state
) = state
;
1458 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1459 reason
, state
? "ON" : "OFF");
1461 VG_(discard_translations_safely
)( (Addr
)0x1000, ~(SizeT
)0xfff, "callgrind");
1463 /* reset internal state: call stacks, simulator */
1464 CLG_(forall_threads
)(unwind_thread
);
1465 CLG_(forall_threads
)(zero_state_cost
);
1466 (*CLG_(cachesim
).clear
)();
1468 if (VG_(clo_verbosity
) > 1)
1469 VG_(message
)(Vg_DebugMsg
, "%s: instrumentation switched %s\n",
1470 reason
, state
? "ON" : "OFF");
1473 /* helper for dump_state_togdb */
1474 static void dump_state_of_thread_togdb(thread_info
* ti
)
1476 static FullCost sum
= 0, tmp
= 0;
1482 t
= CLG_(current_tid
);
1483 CLG_(init_cost_lz
)( CLG_(sets
).full
, &sum
);
1484 CLG_(copy_cost_lz
)( CLG_(sets
).full
, &tmp
, ti
->lastdump_cost
);
1485 CLG_(add_diff_cost
)( CLG_(sets
).full
, sum
, ti
->lastdump_cost
,
1486 ti
->states
.entry
[0]->cost
);
1487 CLG_(copy_cost
)( CLG_(sets
).full
, ti
->lastdump_cost
, tmp
);
1488 mcost
= CLG_(mappingcost_as_string
)(CLG_(dumpmap
), sum
);
1489 VG_(gdb_printf
)("events-%d: %s\n", t
, mcost
);
1491 VG_(gdb_printf
)("frames-%d: %d\n", t
, CLG_(current_call_stack
).sp
);
1494 for(i
= 0; i
< CLG_(current_call_stack
).sp
; i
++) {
1495 ce
= CLG_(get_call_entry
)(i
);
1496 /* if this frame is skipped, we don't have counters */
1497 if (!ce
->jcc
) continue;
1499 from
= ce
->jcc
->from
;
1500 VG_(gdb_printf
)("function-%d-%d: %s\n",t
, i
, from
->cxt
->fn
[0]->name
);
1501 VG_(gdb_printf
)("calls-%d-%d: %llu\n",t
, i
, ce
->jcc
->call_counter
);
1503 /* FIXME: EventSets! */
1504 CLG_(copy_cost
)( CLG_(sets
).full
, sum
, ce
->jcc
->cost
);
1505 CLG_(copy_cost
)( CLG_(sets
).full
, tmp
, ce
->enter_cost
);
1506 CLG_(add_diff_cost
)( CLG_(sets
).full
, sum
,
1507 ce
->enter_cost
, CLG_(current_state
).cost
);
1508 CLG_(copy_cost
)( CLG_(sets
).full
, ce
->enter_cost
, tmp
);
1510 mcost
= CLG_(mappingcost_as_string
)(CLG_(dumpmap
), sum
);
1511 VG_(gdb_printf
)("events-%d-%d: %s\n",t
, i
, mcost
);
1514 if (ce
&& ce
->jcc
) {
1516 VG_(gdb_printf
)("function-%d-%d: %s\n",t
, i
, to
->cxt
->fn
[0]->name
);
1520 /* Dump current state */
1521 static void dump_state_togdb(void)
1525 Int orig_tid
= CLG_(current_tid
);
1527 VG_(gdb_printf
)("instrumentation: %s\n",
1528 CLG_(instrument_state
) ? "on":"off");
1529 if (!CLG_(instrument_state
)) return;
1531 VG_(gdb_printf
)("executed-bbs: %llu\n", CLG_(stat
).bb_executions
);
1532 VG_(gdb_printf
)("executed-calls: %llu\n", CLG_(stat
).call_counter
);
1533 VG_(gdb_printf
)("distinct-bbs: %d\n", CLG_(stat
).distinct_bbs
);
1534 VG_(gdb_printf
)("distinct-calls: %d\n", CLG_(stat
).distinct_jccs
);
1535 VG_(gdb_printf
)("distinct-functions: %d\n", CLG_(stat
).distinct_fns
);
1536 VG_(gdb_printf
)("distinct-contexts: %d\n", CLG_(stat
).distinct_contexts
);
1538 /* "events:" line. Given here because it will be dynamic in the future */
1539 HChar
*evmap
= CLG_(eventmapping_as_string
)(CLG_(dumpmap
));
1540 VG_(gdb_printf
)("events: %s\n", evmap
);
1542 /* "part:" line (number of last part. Is 0 at start */
1543 VG_(gdb_printf
)("part: %d\n", CLG_(get_dump_counter
)());
1546 th
= CLG_(get_threads
)();
1547 VG_(gdb_printf
)("threads:");
1548 for(t
=1;t
<VG_N_THREADS
;t
++) {
1549 if (!th
[t
]) continue;
1550 VG_(gdb_printf
)(" %d", t
);
1552 VG_(gdb_printf
)("\n");
1553 VG_(gdb_printf
)("current-tid: %d\n", orig_tid
);
1554 CLG_(forall_threads
)(dump_state_of_thread_togdb
);
1558 static void print_monitor_help ( void )
1560 VG_(gdb_printf
) ("\n");
1561 VG_(gdb_printf
) ("callgrind monitor commands:\n");
1562 VG_(gdb_printf
) (" dump [<dump_hint>]\n");
1563 VG_(gdb_printf
) (" dump counters\n");
1564 VG_(gdb_printf
) (" zero\n");
1565 VG_(gdb_printf
) (" zero counters\n");
1566 VG_(gdb_printf
) (" status\n");
1567 VG_(gdb_printf
) (" print status\n");
1568 VG_(gdb_printf
) (" instrumentation [on|off]\n");
1569 VG_(gdb_printf
) (" get/set (if on/off given) instrumentation state\n");
1570 VG_(gdb_printf
) ("\n");
1573 /* return True if request recognised, False otherwise */
1574 static Bool
handle_gdb_monitor_command (ThreadId tid
, const HChar
*req
)
1577 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
1580 VG_(strcpy
) (s
, req
);
1582 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
1583 switch (VG_(keyword_id
) ("help dump zero status instrumentation",
1584 wcmd
, kwd_report_duplicated_matches
)) {
1585 case -2: /* multiple matches */
1587 case -1: /* not found */
1590 print_monitor_help();
1592 case 1: { /* dump */
1593 CLG_(dump_profile
)(req
, False
);
1596 case 2: { /* zero */
1597 CLG_(zero_all_cost
)(False
);
1601 case 3: { /* status */
1602 HChar
* arg
= VG_(strtok_r
) (0, " ", &ssaveptr
);
1603 if (arg
&& (VG_(strcmp
)(arg
, "internal") == 0)) {
1604 /* internal interface to callgrind_control */
1609 if (!CLG_(instrument_state
)) {
1610 VG_(gdb_printf
)("No status available as instrumentation is switched off\n");
1612 // Status information to be improved ...
1613 thread_info
** th
= CLG_(get_threads
)();
1615 for(t
=1;t
<VG_N_THREADS
;t
++)
1616 if (th
[t
]) tcount
++;
1617 VG_(gdb_printf
)("%d thread(s) running.\n", tcount
);
1622 case 4: { /* instrumentation */
1623 HChar
* arg
= VG_(strtok_r
) (0, " ", &ssaveptr
);
1625 VG_(gdb_printf
)("instrumentation: %s\n",
1626 CLG_(instrument_state
) ? "on":"off");
1629 CLG_(set_instrument_state
)("Command", VG_(strcmp
)(arg
,"off")!=0);
1640 Bool
CLG_(handle_client_request
)(ThreadId tid
, UWord
*args
, UWord
*ret
)
1642 if (!VG_IS_TOOL_USERREQ('C','T',args
[0])
1643 && VG_USERREQ__GDB_MONITOR_COMMAND
!= args
[0])
1647 case VG_USERREQ__DUMP_STATS
:
1648 CLG_(dump_profile
)("Client Request", True
);
1649 *ret
= 0; /* meaningless */
1652 case VG_USERREQ__DUMP_STATS_AT
:
1654 const HChar
*arg
= (HChar
*)args
[1];
1655 HChar buf
[30 + VG_(strlen
)(arg
)]; // large enough
1656 VG_(sprintf
)(buf
,"Client Request: %s", arg
);
1657 CLG_(dump_profile
)(buf
, True
);
1658 *ret
= 0; /* meaningless */
1662 case VG_USERREQ__ZERO_STATS
:
1663 CLG_(zero_all_cost
)(True
);
1664 *ret
= 0; /* meaningless */
1667 case VG_USERREQ__TOGGLE_COLLECT
:
1668 CLG_(current_state
).collect
= !CLG_(current_state
).collect
;
1669 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1670 CLG_(current_state
).collect
? "ON" : "OFF");
1671 *ret
= 0; /* meaningless */
1674 case VG_USERREQ__START_INSTRUMENTATION
:
1675 CLG_(set_instrument_state
)("Client Request", True
);
1676 *ret
= 0; /* meaningless */
1679 case VG_USERREQ__STOP_INSTRUMENTATION
:
1680 CLG_(set_instrument_state
)("Client Request", False
);
1681 *ret
= 0; /* meaningless */
1684 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
1685 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)args
[1]);
1700 /* Syscall Timing. syscalltime[tid] is the time at which thread tid last
1701 started a syscall. */
1703 /* struct vki_timespec syscalltime[VG_N_THREADS];
1704 Whatever the syscall we use to measure the syscall time, we convert to
1705 seconds and nanoseconds. */
1706 struct vki_timespec
*syscalltime
;
1707 struct vki_timespec
*syscallcputime
;
1711 void collect_time (struct vki_timespec
*systime
, struct vki_timespec
*syscputime
)
1713 switch (CLG_(clo
).collect_systime
) {
1714 case systime_no
: tl_assert (0);
1715 case systime_msec
: {
1716 UInt ms_timer
= VG_(read_millisecond_timer
)();
1717 systime
->tv_sec
= ms_timer
/ 1000;
1718 systime
->tv_nsec
= (ms_timer
% 1000) * 1000000L;
1721 case systime_usec
: {
1722 struct vki_timeval tv_now
;
1723 VG_(gettimeofday
)(&tv_now
, NULL
);
1724 systime
->tv_sec
= tv_now
.tv_sec
;
1725 systime
->tv_nsec
= tv_now
.tv_usec
* 1000;
1729 # if defined(VGO_linux) || defined(VGO_solaris)
1730 VG_(clock_gettime
)(systime
, VKI_CLOCK_MONOTONIC
);
1731 VG_(clock_gettime
)(syscputime
, VKI_CLOCK_THREAD_CPUTIME_ID
);
1733 # elif defined(VGO_darwin)
1736 # error "Unknown OS"
1743 void CLG_(pre_syscalltime
)(ThreadId tid
, UInt syscallno
,
1744 UWord
* args
, UInt nArgs
)
1746 collect_time(&syscalltime
[tid
],
1747 CLG_(clo
).collect_systime
== systime_nsec
? &syscallcputime
[tid
] : NULL
);
1750 /* Returns "after - before" in the unit as specified by --collect-systime.
1751 after is supposed to be >= before, and tv_nsec must be >= 0 and < One_Second_In_Nsec. */
1753 ULong
vki_timespec_diff (struct vki_timespec after
, struct vki_timespec before
)
1755 vki_time_t diff_sec
= after
.tv_sec
- before
.tv_sec
;
1756 long diff_nsec
= after
.tv_nsec
- before
.tv_nsec
;
1757 ULong nsec_factor
; // factor to convert the desired unit into nsec.
1759 if (diff_nsec
< 0) {
1761 diff_nsec
+= 1000000000ULL;
1763 switch (CLG_(clo
).collect_systime
) {
1764 case systime_no
: tl_assert (0);
1765 case systime_msec
: nsec_factor
= 1000000ULL; break;
1766 case systime_usec
: nsec_factor
= 1000ULL; break;
1767 case systime_nsec
: nsec_factor
= 1ULL; break;
1768 default: tl_assert(0);
1770 return ((ULong
) diff_sec
* 1000000000ULL + diff_nsec
) / nsec_factor
;
1774 void CLG_(post_syscalltime
)(ThreadId tid
, UInt syscallno
,
1775 UWord
* args
, UInt nArgs
, SysRes res
)
1777 if (CLG_(current_state
).bbcc
) {
1779 struct vki_timespec ts_now
;
1780 struct vki_timespec ts_cpunow
;
1783 collect_time(&ts_now
,
1784 CLG_(clo
).collect_systime
== systime_nsec
? &ts_cpunow
: NULL
);
1786 diff
= vki_timespec_diff (ts_now
, syscalltime
[tid
]);
1788 /* offset o is for "SysCount", o+1 for "SysTime",
1789 o+2 is (optionally) "SysCpuTime". */
1790 o
= fullOffset(EG_SYS
);
1792 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o
, syscallno
,
1795 if (!CLG_(current_state
).bbcc
->skipped
)
1796 CLG_(init_cost_lz
)(CLG_(sets
).full
,
1797 &(CLG_(current_state
).bbcc
->skipped
));
1798 CLG_(current_state
).cost
[o
] ++;
1799 CLG_(current_state
).cost
[o
+1] += diff
;
1800 CLG_(current_state
).bbcc
->skipped
[o
] ++;
1801 CLG_(current_state
).bbcc
->skipped
[o
+1] += diff
;
1802 if (CLG_(clo
).collect_systime
== systime_nsec
) {
1803 diff
= vki_timespec_diff (ts_cpunow
, syscallcputime
[tid
]);
1804 CLG_DEBUG(0," SysCpuTime (Off %d) for Syscall %u: %llu\n", o
+2, syscallno
,
1806 CLG_(current_state
).cost
[o
+2] += diff
;
1807 CLG_(current_state
).bbcc
->skipped
[o
+2] += diff
;
1812 static UInt
ULong_width(ULong n
)
1820 return w
+ (w
-1)/3; // add space for commas
1824 void branchsim_printstat(int l1
, int l2
, int l3
)
1826 static HChar fmt
[128]; // large enough
1828 ULong Bc_total_b
, Bc_total_mp
, Bi_total_b
, Bi_total_mp
;
1829 ULong B_total_b
, B_total_mp
;
1831 total
= CLG_(total_cost
);
1832 Bc_total_b
= total
[ fullOffset(EG_BC
) ];
1833 Bc_total_mp
= total
[ fullOffset(EG_BC
)+1 ];
1834 Bi_total_b
= total
[ fullOffset(EG_BI
) ];
1835 Bi_total_mp
= total
[ fullOffset(EG_BI
)+1 ];
1837 /* Make format string, getting width right for numbers */
1838 VG_(sprintf
)(fmt
, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1841 if (0 == Bc_total_b
) Bc_total_b
= 1;
1842 if (0 == Bi_total_b
) Bi_total_b
= 1;
1843 B_total_b
= Bc_total_b
+ Bi_total_b
;
1844 B_total_mp
= Bc_total_mp
+ Bi_total_mp
;
1847 VG_(umsg
)(fmt
, "Branches: ",
1848 B_total_b
, Bc_total_b
, Bi_total_b
);
1850 VG_(umsg
)(fmt
, "Mispredicts: ",
1851 B_total_mp
, Bc_total_mp
, Bi_total_mp
);
1853 VG_(umsg
)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1854 l1
, B_total_mp
* 100.0 / B_total_b
,
1855 l2
, Bc_total_mp
* 100.0 / Bc_total_b
,
1856 l3
, Bi_total_mp
* 100.0 / Bi_total_b
);
1860 void clg_print_stats(void)
1863 CLG_(stat
).full_debug_BBs
+
1864 CLG_(stat
).fn_name_debug_BBs
+
1865 CLG_(stat
).file_line_debug_BBs
+
1866 CLG_(stat
).no_debug_BBs
;
1868 /* Hash table stats */
1869 VG_(message
)(Vg_DebugMsg
, "Distinct objects: %d\n",
1870 CLG_(stat
).distinct_objs
);
1871 VG_(message
)(Vg_DebugMsg
, "Distinct files: %d\n",
1872 CLG_(stat
).distinct_files
);
1873 VG_(message
)(Vg_DebugMsg
, "Distinct fns: %d\n",
1874 CLG_(stat
).distinct_fns
);
1875 VG_(message
)(Vg_DebugMsg
, "Distinct contexts:%d\n",
1876 CLG_(stat
).distinct_contexts
);
1877 VG_(message
)(Vg_DebugMsg
, "Distinct BBs: %d\n",
1878 CLG_(stat
).distinct_bbs
);
1879 VG_(message
)(Vg_DebugMsg
, "Cost entries: %u (Chunks %u)\n",
1880 CLG_(costarray_entries
), CLG_(costarray_chunks
));
1881 VG_(message
)(Vg_DebugMsg
, "Distinct BBCCs: %d\n",
1882 CLG_(stat
).distinct_bbccs
);
1883 VG_(message
)(Vg_DebugMsg
, "Distinct JCCs: %d\n",
1884 CLG_(stat
).distinct_jccs
);
1885 VG_(message
)(Vg_DebugMsg
, "Distinct skips: %d\n",
1886 CLG_(stat
).distinct_skips
);
1887 VG_(message
)(Vg_DebugMsg
, "BB lookups: %d\n",
1890 VG_(message
)(Vg_DebugMsg
, "With full debug info:%3d%% (%d)\n",
1891 CLG_(stat
).full_debug_BBs
* 100 / BB_lookups
,
1892 CLG_(stat
).full_debug_BBs
);
1893 VG_(message
)(Vg_DebugMsg
, "With file/line debug info:%3d%% (%d)\n",
1894 CLG_(stat
).file_line_debug_BBs
* 100 / BB_lookups
,
1895 CLG_(stat
).file_line_debug_BBs
);
1896 VG_(message
)(Vg_DebugMsg
, "With fn name debug info:%3d%% (%d)\n",
1897 CLG_(stat
).fn_name_debug_BBs
* 100 / BB_lookups
,
1898 CLG_(stat
).fn_name_debug_BBs
);
1899 VG_(message
)(Vg_DebugMsg
, "With no debug info:%3d%% (%d)\n",
1900 CLG_(stat
).no_debug_BBs
* 100 / BB_lookups
,
1901 CLG_(stat
).no_debug_BBs
);
1903 VG_(message
)(Vg_DebugMsg
, "BBCC Clones: %d\n",
1904 CLG_(stat
).bbcc_clones
);
1905 VG_(message
)(Vg_DebugMsg
, "BBs Retranslated: %d\n",
1906 CLG_(stat
).bb_retranslations
);
1907 VG_(message
)(Vg_DebugMsg
, "Distinct instrs: %d\n",
1908 CLG_(stat
).distinct_instrs
);
1910 VG_(message
)(Vg_DebugMsg
, "LRU Contxt Misses: %d\n",
1911 CLG_(stat
).cxt_lru_misses
);
1912 VG_(message
)(Vg_DebugMsg
, "LRU BBCC Misses: %d\n",
1913 CLG_(stat
).bbcc_lru_misses
);
1914 VG_(message
)(Vg_DebugMsg
, "LRU JCC Misses: %d\n",
1915 CLG_(stat
).jcc_lru_misses
);
1916 VG_(message
)(Vg_DebugMsg
, "BBs Executed: %llu\n",
1917 CLG_(stat
).bb_executions
);
1918 VG_(message
)(Vg_DebugMsg
, "Calls: %llu\n",
1919 CLG_(stat
).call_counter
);
1920 VG_(message
)(Vg_DebugMsg
, "CondJMP followed: %llu\n",
1921 CLG_(stat
).jcnd_counter
);
1922 VG_(message
)(Vg_DebugMsg
, "Boring JMPs: %llu\n",
1923 CLG_(stat
).jump_counter
);
1924 VG_(message
)(Vg_DebugMsg
, "Recursive calls: %llu\n",
1925 CLG_(stat
).rec_call_counter
);
1926 VG_(message
)(Vg_DebugMsg
, "Returns: %llu\n",
1927 CLG_(stat
).ret_counter
);
1934 HChar fmt
[128]; // large enough
1938 CLG_DEBUG(0, "finish()\n");
1940 (*CLG_(cachesim
).finish
)();
1942 /* pop all remaining items from CallStack for correct sum
1944 CLG_(forall_threads
)(unwind_thread
);
1946 CLG_(dump_profile
)(0, False
);
1948 if (VG_(clo_verbosity
) == 0) return;
1950 if (VG_(clo_stats
)) {
1951 VG_(message
)(Vg_DebugMsg
, "\n");
1953 VG_(message
)(Vg_DebugMsg
, "\n");
1956 HChar
*evmap
= CLG_(eventmapping_as_string
)(CLG_(dumpmap
));
1957 VG_(message
)(Vg_UserMsg
, "Events : %s\n", evmap
);
1959 HChar
*mcost
= CLG_(mappingcost_as_string
)(CLG_(dumpmap
), CLG_(total_cost
));
1960 VG_(message
)(Vg_UserMsg
, "Collected : %s\n", mcost
);
1962 VG_(message
)(Vg_UserMsg
, "\n");
1964 /* determine value widths for statistics */
1965 total
= CLG_(total_cost
);
1966 l1
= ULong_width( total
[fullOffset(EG_IR
)] );
1968 if (CLG_(clo
).simulate_cache
) {
1969 l2
= ULong_width( total
[fullOffset(EG_DR
)] );
1970 l3
= ULong_width( total
[fullOffset(EG_DW
)] );
1972 if (CLG_(clo
).simulate_branch
) {
1973 int l2b
= ULong_width( total
[fullOffset(EG_BC
)] );
1974 int l3b
= ULong_width( total
[fullOffset(EG_BI
)] );
1975 if (l2b
> l2
) l2
= l2b
;
1976 if (l3b
> l3
) l3
= l3b
;
1979 /* Make format string, getting width right for numbers */
1980 VG_(sprintf
)(fmt
, "%%s %%,%dllu\n", l1
);
1982 /* Always print this */
1983 VG_(umsg
)(fmt
, "I refs: ", total
[fullOffset(EG_IR
)] );
1985 if (CLG_(clo
).simulate_cache
)
1986 (*CLG_(cachesim
).printstat
)(l1
, l2
, l3
);
1988 if (CLG_(clo
).simulate_branch
)
1989 branchsim_printstat(l1
, l2
, l3
);
1994 void CLG_(fini
)(Int exitcode
)
2000 /*--------------------------------------------------------------------*/
2002 /*--------------------------------------------------------------------*/
2004 static void clg_start_client_code_callback ( ThreadId tid
, ULong blocks_done
)
2006 static ULong last_blocks_done
= 0;
2009 VG_(printf
)("%d R %llu\n", (Int
)tid
, blocks_done
);
2011 /* throttle calls to CLG_(run_thread) by number of BBs executed */
2012 if (blocks_done
- last_blocks_done
< 5000) return;
2013 last_blocks_done
= blocks_done
;
2015 CLG_(run_thread
)( tid
);
2019 void CLG_(post_clo_init
)(void)
2021 if (VG_(clo_vex_control
).iropt_register_updates_default
2022 != VexRegUpdSpAtMemAccess
) {
2023 CLG_DEBUG(1, " Using user specified value for "
2024 "--vex-iropt-register-updates\n");
2027 " Using default --vex-iropt-register-updates="
2028 "sp-at-mem-access\n");
2031 if (CLG_(clo
).collect_systime
!= systime_no
) {
2032 VG_(needs_syscall_wrapper
)(CLG_(pre_syscalltime
),
2033 CLG_(post_syscalltime
));
2034 syscalltime
= CLG_MALLOC("cl.main.pci.1",
2035 VG_N_THREADS
* sizeof syscalltime
[0]);
2036 for (UInt i
= 0; i
< VG_N_THREADS
; ++i
) {
2037 syscalltime
[i
].tv_sec
= 0;
2038 syscalltime
[i
].tv_nsec
= 0;
2040 if (CLG_(clo
).collect_systime
== systime_nsec
) {
2041 syscallcputime
= CLG_MALLOC("cl.main.pci.2",
2042 VG_N_THREADS
* sizeof syscallcputime
[0]);
2043 for (UInt i
= 0; i
< VG_N_THREADS
; ++i
) {
2044 syscallcputime
[i
].tv_sec
= 0;
2045 syscallcputime
[i
].tv_nsec
= 0;
2050 if (VG_(clo_px_file_backed
) != VexRegUpdSpAtMemAccess
) {
2051 CLG_DEBUG(1, " Using user specified value for "
2052 "--px-file-backed\n");
2055 " Using default --px-file-backed="
2056 "sp-at-mem-access\n");
2059 if (VG_(clo_vex_control
).iropt_unroll_thresh
!= 0) {
2060 VG_(message
)(Vg_UserMsg
,
2061 "callgrind only works with --vex-iropt-unroll-thresh=0\n"
2062 "=> resetting it back to 0\n");
2063 VG_(clo_vex_control
).iropt_unroll_thresh
= 0; // cannot be overridden.
2065 if (VG_(clo_vex_control
).guest_chase
) {
2066 VG_(message
)(Vg_UserMsg
,
2067 "callgrind only works with --vex-guest-chase=no\n"
2068 "=> resetting it back to 'no'\n");
2069 VG_(clo_vex_control
).guest_chase
= False
; // cannot be overridden.
2072 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo
).separate_threads
? "Yes":"No");
2073 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo
).separate_callers
);
2074 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo
).separate_recursions
);
2076 if (!CLG_(clo
).dump_line
&& !CLG_(clo
).dump_instr
&& !CLG_(clo
).dump_bb
) {
2077 VG_(message
)(Vg_UserMsg
, "Using source line as position.\n");
2078 CLG_(clo
).dump_line
= True
;
2083 (*CLG_(cachesim
).post_clo_init
)();
2085 CLG_(init_eventsets
)();
2086 CLG_(init_statistics
)(& CLG_(stat
));
2087 CLG_(init_cost_lz
)( CLG_(sets
).full
, &CLG_(total_cost
) );
2089 /* initialize hash tables */
2090 CLG_(init_obj_table
)();
2091 CLG_(init_cxt_table
)();
2092 CLG_(init_bb_hash
)();
2094 CLG_(init_threads
)();
2095 CLG_(run_thread
)(1);
2097 CLG_(instrument_state
) = CLG_(clo
).instrument_atstart
;
2099 if (VG_(clo_verbosity
) > 0) {
2100 VG_(message
)(Vg_UserMsg
,
2101 "For interactive control, run 'callgrind_control%s%s -h'.\n",
2102 (VG_(arg_vgdb_prefix
) ? " " : ""),
2103 (VG_(arg_vgdb_prefix
) ? VG_(arg_vgdb_prefix
) : ""));
2108 void CLG_(pre_clo_init
)(void)
2110 VG_(details_name
) ("Callgrind");
2111 VG_(details_version
) (NULL
);
2112 VG_(details_description
) ("a call-graph generating cache profiler");
2113 VG_(details_copyright_author
)("Copyright (C) 2002-2017, and GNU GPL'd, "
2114 "by Josef Weidendorfer et al.");
2115 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
2116 VG_(details_avg_translation_sizeB
) ( 500 );
2118 VG_(clo_vex_control
).iropt_register_updates_default
2119 = VG_(clo_px_file_backed
)
2120 = VexRegUpdSpAtMemAccess
; // overridable by the user.
2122 VG_(clo_vex_control
).iropt_unroll_thresh
= 0; // cannot be overridden.
2123 VG_(clo_vex_control
).guest_chase
= False
; // cannot be overridden.
2125 VG_(basic_tool_funcs
) (CLG_(post_clo_init
),
2129 VG_(needs_superblock_discards
)(clg_discard_superblock_info
);
2132 VG_(needs_command_line_options
)(CLG_(process_cmd_line_option
),
2134 CLG_(print_debug_usage
));
2136 VG_(needs_client_requests
)(CLG_(handle_client_request
));
2137 VG_(needs_print_stats
) (clg_print_stats
);
2139 VG_(track_start_client_code
) ( & clg_start_client_code_callback
);
2140 VG_(track_pre_deliver_signal
) ( & CLG_(pre_signal
) );
2141 VG_(track_post_deliver_signal
)( & CLG_(post_signal
) );
2143 CLG_(set_clo_defaults
)();
2147 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init
))
2149 /*--------------------------------------------------------------------*/
2150 /*--- end main.c ---*/
2151 /*--------------------------------------------------------------------*/