2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2000-2013 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
49 #define DEBUG_TRANSTAB 0
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
56 /* Nr of sectors provided via command line parameter. */
57 UInt
VG_(clo_num_transtab_sectors
) = N_SECTORS_DEFAULT
;
59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static UInt n_sectors
= 0;
62 /*------------------ CONSTANTS ------------------*/
63 /* Number of TC entries in each sector. This needs to be a prime
64 number to work properly, it must be <= 65535 (so that a TT index
65 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
66 'deleted') and it is strongly recommended not to change this.
67 65521 is the largest prime <= 65535. */
68 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
70 /* Because each sector contains a hash table of TTEntries, we need to
71 specify the maximum allowable loading, after which the sector is
73 #define SECTOR_TT_LIMIT_PERCENT 65
75 /* The sector is deemed full when this many entries are in it. */
76 #define N_TTES_PER_SECTOR_USABLE \
77 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
79 /* Equivalence classes for fast address range deletion. There are 1 +
80 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
81 address range which does not fall cleanly within any specific bin.
82 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
83 #define ECLASS_SHIFT 11
84 #define ECLASS_WIDTH 8
85 #define ECLASS_MISC (1 << ECLASS_WIDTH)
86 #define ECLASS_N (1 + ECLASS_MISC)
88 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
91 /*------------------ TYPES ------------------*/
93 /* In edges ("to-me") in the graph created by chaining. */
96 UInt from_sNo
; /* sector number */
97 UInt from_tteNo
; /* TTE number in given sector */
98 UInt from_offs
; /* code offset from TCEntry::tcptr where the patch is */
99 Bool to_fastEP
; /* Is the patch to a fast or slow entry point? */
104 /* Out edges ("from-me") in the graph created by chaining. */
107 UInt to_sNo
; /* sector number */
108 UInt to_tteNo
; /* TTE number in given sector */
109 UInt from_offs
; /* code offset in owning translation where patch is */
114 #define N_FIXED_IN_EDGE_ARR 3
117 UInt n_fixed
; /* 0 .. N_FIXED_IN_EDGE_ARR */
118 InEdge fixed
[N_FIXED_IN_EDGE_ARR
];
119 XArray
* var
; /* XArray* of InEdgeArr */
123 #define N_FIXED_OUT_EDGE_ARR 2
126 UInt n_fixed
; /* 0 .. N_FIXED_OUT_EDGE_ARR */
127 OutEdge fixed
[N_FIXED_OUT_EDGE_ARR
];
128 XArray
* var
; /* XArray* of OutEdgeArr */
133 /* A translation-table entry. This indicates precisely which areas of
134 guest code are included in the translation, and contains all other
135 auxiliary info too. */
138 /* Profiling only: the count and weight (arbitrary meaning) for
139 this translation. Weight is a property of the translation
140 itself and computed once when the translation is created.
141 Count is an entry count for the translation and is
142 incremented by 1 every time the translation is used, if we
147 /* Status of the slot. Note, we need to be able to do lazy
148 deletion, hence the Deleted state. */
149 enum { InUse
, Deleted
, Empty
} status
;
151 /* 64-bit aligned pointer to one or more 64-bit words containing
152 the corresponding host code (must be in the same sector!)
153 This is a pointer into the sector's tc (code) area. */
156 /* This is the original guest address that purportedly is the
157 entry point of the translation. You might think that .entry
158 should be the same as .vge->base[0], and most of the time it
159 is. However, when doing redirections, that is not the case.
160 .vge must always correctly describe the guest code sections
161 from which this translation was made. However, .entry may or
162 may not be a lie, depending on whether or not we're doing
166 /* This structure describes precisely what ranges of guest code
167 the translation covers, so we can decide whether or not to
168 delete it when translations of a given address range are
172 /* Address range summary info: these are pointers back to
173 eclass[] entries in the containing Sector. Those entries in
174 turn point back here -- the two structures are mutually
175 redundant but both necessary to make fast deletions work.
176 The eclass info is similar to, and derived from, this entry's
177 'vge' field, but it is not the same */
178 UShort n_tte2ec
; // # tte2ec pointers (1 to 3)
179 UShort tte2ec_ec
[3]; // for each, the eclass #
180 UInt tte2ec_ix
[3]; // and the index within the eclass.
181 // for i in 0 .. n_tte2ec-1
182 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
183 // should be the index
184 // of this TTEntry in the containing Sector's tt array.
186 /* Admin information for chaining. 'in_edges' is a set of the
187 patch points which jump to this translation -- hence are
188 predecessors in the control flow graph. 'out_edges' points
189 to successors in the control flow graph -- translations to
190 which this one has a patched jump. In short these are just
191 backwards and forwards edges in the graph of patched-together
192 blocks. The 'in_edges' contain slightly more info, enough
193 that we can undo the chaining of each mentioned patch point.
194 The 'out_edges' list exists only so that we can visit the
195 'in_edges' entries of all blocks we're patched through to, in
196 order to remove ourselves from then when we're deleted. */
198 /* A translation can disappear for two reasons:
199 1. erased (as part of the oldest sector cleanup) when the
200 youngest sector is full.
201 2. discarded due to calls to VG_(discard_translations).
202 VG_(discard_translations) sets the status of the
203 translation to 'Deleted'.
204 A.o., the gdbserver discards one or more translations
205 when a breakpoint is inserted or removed at an Addr,
206 or when single stepping mode is enabled/disabled
207 or when a translation is instrumented for gdbserver
208 (all the target jumps of this translation are
211 So, it is possible that the translation A to be patched
212 (to obtain a patched jump from A to B) is invalidated
213 after B is translated and before A is patched.
214 In case a translation is erased or discarded, the patching
215 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
216 are checking the 'from' translation still exists before
219 Is it safe to erase or discard the current translation E being
220 executed ? Amazing, but yes, it is safe.
221 Here is the explanation:
223 The translation E being executed can only be erased if a new
224 translation N is being done. A new translation is done only
225 if the host addr is a not yet patched jump to another
226 translation. In such a case, the guest address of N is
227 assigned to the PC in the VEX state. Control is returned
228 to the scheduler. N will be translated. This can erase the
229 translation E (in case of sector full). VG_(tt_tc_do_chaining)
230 will not do the chaining to a non found translation E.
231 The execution will continue at the current guest PC
232 (i.e. the translation N).
233 => it is safe to erase the current translation being executed.
235 The current translation E being executed can also be discarded
236 (e.g. by gdbserver). VG_(discard_translations) will mark
237 this translation E as Deleted, but the translation itself
238 is not erased. In particular, its host code can only
239 be overwritten or erased in case a new translation is done.
240 A new translation will only be done if a not yet translated
241 jump is to be executed. The execution of the Deleted translation
242 E will continue till a non patched jump is encountered.
243 This situation is then similar to the 'erasing' case above :
244 the current translation E can be erased or overwritten, as the
245 execution will continue at the new translation N.
249 /* It is possible, although very unlikely, that a block A has
250 more than one patched jump to block B. This could happen if
251 (eg) A finishes "jcond B; jmp B".
253 This means in turn that B's in_edges set can list A more than
254 once (twice in this example). However, each such entry must
255 have a different from_offs, since a patched jump can only
256 jump to one place at once (it's meaningless for it to have
257 multiple destinations.) IOW, the successor and predecessor
258 edges in the graph are not uniquely determined by a
259 TTEntry --> TTEntry pair, but rather by a
260 (TTEntry,offset) --> TTEntry triple.
262 If A has multiple edges to B then B will mention A multiple
263 times in its in_edges. To make things simpler, we then
264 require that A mentions B exactly the same number of times in
265 its out_edges. Furthermore, a matching out-in pair must have
266 the same offset (from_offs). This facilitates sanity
267 checking, and it facilitates establishing the invariant that
268 a out_edges set may not have duplicates when using the
269 equality defined by (TTEntry,offset). Hence the out_edges
270 and in_edges sets really do have both have set semantics.
272 eg if A has been patched to B at offsets 42 and 87 (in A)
273 then A.out_edges = { (B,42), (B,87) } (in any order)
274 and B.in_edges = { (A,42), (A,87) } (in any order)
276 Hence for each node pair P->Q in the graph, there's a 1:1
277 mapping between P.out_edges and Q.in_edges.
280 OutEdgeArr out_edges
;
285 /* A structure used for mapping host code addresses back to the
286 relevant TTEntry. Used when doing chaining, for finding the
287 TTEntry to which some arbitrary patch address belongs. */
296 /* Finally, a sector itself. Each sector contains an array of
297 TCEntries, which hold code, and an array of TTEntries, containing
298 all required administrative info. Profiling is supported using the
299 TTEntry .count and .weight fields, if required.
301 If the sector is not in use, all three pointers are NULL and
306 /* The TCEntry area. Size of this depends on the average
307 translation size. We try and size it so it becomes full
308 precisely when this sector's translation table (tt) reaches
309 its load limit (SECTOR_TT_LIMIT_PERCENT). */
312 /* The TTEntry array. This is a fixed size, always containing
313 exactly N_TTES_PER_SECTOR entries. */
316 /* This points to the current allocation point in tc. */
319 /* The count of tt entries with state InUse. */
322 /* Expandable arrays of tt indices for each of the ECLASS_N
323 address range equivalence classes. These hold indices into
324 the containing sector's tt array, which in turn should point
326 Int ec2tte_size
[ECLASS_N
];
327 Int ec2tte_used
[ECLASS_N
];
328 UShort
* ec2tte
[ECLASS_N
];
330 /* The host extents. The [start, +len) ranges are constructed
331 in strictly non-overlapping order, so we can binary search
333 XArray
* host_extents
; /* XArray* of HostExtent */
338 /*------------------ DECLS ------------------*/
340 /* The root data structure is an array of sectors. The index of the
341 youngest sector is recorded, and new translations are put into that
342 sector. When it fills up, we move along to the next sector and
343 start to fill that up, wrapping around at the end of the array.
344 That way, once all N_TC_SECTORS have been bought into use for the
345 first time, and are full, we then re-use the oldest sector,
348 When running, youngest sector should be between >= 0 and <
349 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is
352 static Sector sectors
[MAX_N_SECTORS
];
353 static Int youngest_sector
= -1;
355 /* The number of ULongs in each TCEntry area. This is computed once
356 at startup and does not change. */
357 static Int tc_sector_szQ
= 0;
360 /* A list of sector numbers, in the order which they should be
361 searched to find translations. This is an optimisation to be used
362 when searching for translations and should not affect
363 correctness. -1 denotes "no entry". */
364 static Int sector_search_order
[MAX_N_SECTORS
];
367 /* Fast helper for the TC. A direct-mapped cache which holds a set of
368 recently used (guest address, host address) pairs. This array is
369 referred to directly from m_dispatch/dispatch-<platform>.S.
371 Entries in tt_fast may refer to any valid TC entry, regardless of
372 which sector it's in. Consequently we must be very careful to
373 invalidate this cache when TC entries are changed or disappear.
375 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
376 pointed at to cause that cache entry to miss. This relies on the
377 assumption that no guest code actually has that address, hence a
378 value 0x1 seems good. m_translate gives the client a synthetic
379 segfault if it tries to execute at this address.
389 /*global*/ __attribute__((aligned(16)))
390 FastCacheEntry
VG_(tt_fast
)[VG_TT_FAST_SIZE
];
392 /* Make sure we're not used before initialisation. */
393 static Bool init_done
= False
;
396 /*------------------ STATS DECLS ------------------*/
398 /* Number of fast-cache updates and flushes done. */
399 static ULong n_fast_flushes
= 0;
400 static ULong n_fast_updates
= 0;
402 /* Number of full lookups done. */
403 static ULong n_full_lookups
= 0;
404 static ULong n_lookup_probes
= 0;
406 /* Number/osize/tsize of translations entered; also the number of
407 those for which self-checking was requested. */
408 static ULong n_in_count
= 0;
409 static ULong n_in_osize
= 0;
410 static ULong n_in_tsize
= 0;
411 static ULong n_in_sc_count
= 0;
413 /* Number/osize of translations discarded due to lack of space. */
414 static ULong n_dump_count
= 0;
415 static ULong n_dump_osize
= 0;
417 /* Number/osize of translations discarded due to requests to do so. */
418 static ULong n_disc_count
= 0;
419 static ULong n_disc_osize
= 0;
422 /*-------------------------------------------------------------*/
424 /*-------------------------------------------------------------*/
426 static void* ttaux_malloc ( const HChar
* tag
, SizeT n
)
428 return VG_(arena_malloc
)(VG_AR_TTAUX
, tag
, n
);
431 static void ttaux_free ( void* p
)
433 VG_(arena_free
)(VG_AR_TTAUX
, p
);
437 /*-------------------------------------------------------------*/
438 /*--- Chaining support ---*/
439 /*-------------------------------------------------------------*/
441 static inline TTEntry
* index_tte ( UInt sNo
, UInt tteNo
)
443 vg_assert(sNo
< n_sectors
);
444 vg_assert(tteNo
< N_TTES_PER_SECTOR
);
445 Sector
* s
= §ors
[sNo
];
447 TTEntry
* tte
= &s
->tt
[tteNo
];
448 vg_assert(tte
->status
== InUse
);
452 static void InEdge__init ( InEdge
* ie
)
454 ie
->from_sNo
= -1; /* invalid */
457 ie
->to_fastEP
= False
;
460 static void OutEdge__init ( OutEdge
* oe
)
462 oe
->to_sNo
= -1; /* invalid */
467 static void TTEntry__init ( TTEntry
* tte
)
469 VG_(memset
)(tte
, 0, sizeof(*tte
));
472 static UWord
InEdgeArr__size ( const InEdgeArr
* iea
)
475 vg_assert(iea
->n_fixed
== 0);
476 return VG_(sizeXA
)(iea
->var
);
478 vg_assert(iea
->n_fixed
<= N_FIXED_IN_EDGE_ARR
);
483 static void InEdgeArr__makeEmpty ( InEdgeArr
* iea
)
486 vg_assert(iea
->n_fixed
== 0);
487 VG_(deleteXA
)(iea
->var
);
490 vg_assert(iea
->n_fixed
<= N_FIXED_IN_EDGE_ARR
);
496 InEdge
* InEdgeArr__index ( InEdgeArr
* iea
, UWord i
)
499 vg_assert(iea
->n_fixed
== 0);
500 return (InEdge
*)VG_(indexXA
)(iea
->var
, i
);
502 vg_assert(i
< iea
->n_fixed
);
503 return &iea
->fixed
[i
];
508 void InEdgeArr__deleteIndex ( InEdgeArr
* iea
, UWord i
)
511 vg_assert(iea
->n_fixed
== 0);
512 VG_(removeIndexXA
)(iea
->var
, i
);
514 vg_assert(i
< iea
->n_fixed
);
515 for (; i
+1 < iea
->n_fixed
; i
++) {
516 iea
->fixed
[i
] = iea
->fixed
[i
+1];
523 void InEdgeArr__add ( InEdgeArr
* iea
, InEdge
* ie
)
526 vg_assert(iea
->n_fixed
== 0);
527 VG_(addToXA
)(iea
->var
, ie
);
529 vg_assert(iea
->n_fixed
<= N_FIXED_IN_EDGE_ARR
);
530 if (iea
->n_fixed
== N_FIXED_IN_EDGE_ARR
) {
531 /* The fixed array is full, so we have to initialise an
532 XArray and copy the fixed array into it. */
533 iea
->var
= VG_(newXA
)(ttaux_malloc
, "transtab.IEA__add",
537 for (i
= 0; i
< iea
->n_fixed
; i
++) {
538 VG_(addToXA
)(iea
->var
, &iea
->fixed
[i
]);
540 VG_(addToXA
)(iea
->var
, ie
);
543 /* Just add to the fixed array. */
544 iea
->fixed
[iea
->n_fixed
++] = *ie
;
549 static UWord
OutEdgeArr__size ( const OutEdgeArr
* oea
)
552 vg_assert(oea
->n_fixed
== 0);
553 return VG_(sizeXA
)(oea
->var
);
555 vg_assert(oea
->n_fixed
<= N_FIXED_OUT_EDGE_ARR
);
560 static void OutEdgeArr__makeEmpty ( OutEdgeArr
* oea
)
563 vg_assert(oea
->n_fixed
== 0);
564 VG_(deleteXA
)(oea
->var
);
567 vg_assert(oea
->n_fixed
<= N_FIXED_OUT_EDGE_ARR
);
573 OutEdge
* OutEdgeArr__index ( OutEdgeArr
* oea
, UWord i
)
576 vg_assert(oea
->n_fixed
== 0);
577 return (OutEdge
*)VG_(indexXA
)(oea
->var
, i
);
579 vg_assert(i
< oea
->n_fixed
);
580 return &oea
->fixed
[i
];
585 void OutEdgeArr__deleteIndex ( OutEdgeArr
* oea
, UWord i
)
588 vg_assert(oea
->n_fixed
== 0);
589 VG_(removeIndexXA
)(oea
->var
, i
);
591 vg_assert(i
< oea
->n_fixed
);
592 for (; i
+1 < oea
->n_fixed
; i
++) {
593 oea
->fixed
[i
] = oea
->fixed
[i
+1];
600 void OutEdgeArr__add ( OutEdgeArr
* oea
, OutEdge
* oe
)
603 vg_assert(oea
->n_fixed
== 0);
604 VG_(addToXA
)(oea
->var
, oe
);
606 vg_assert(oea
->n_fixed
<= N_FIXED_OUT_EDGE_ARR
);
607 if (oea
->n_fixed
== N_FIXED_OUT_EDGE_ARR
) {
608 /* The fixed array is full, so we have to initialise an
609 XArray and copy the fixed array into it. */
610 oea
->var
= VG_(newXA
)(ttaux_malloc
, "transtab.OEA__add",
614 for (i
= 0; i
< oea
->n_fixed
; i
++) {
615 VG_(addToXA
)(oea
->var
, &oea
->fixed
[i
]);
617 VG_(addToXA
)(oea
->var
, oe
);
620 /* Just add to the fixed array. */
621 oea
->fixed
[oea
->n_fixed
++] = *oe
;
627 Int
HostExtent__cmpOrd ( const void* v1
, const void* v2
)
629 const HostExtent
* hx1
= v1
;
630 const HostExtent
* hx2
= v2
;
631 if (hx1
->start
+ hx1
->len
<= hx2
->start
) return -1;
632 if (hx2
->start
+ hx2
->len
<= hx1
->start
) return 1;
633 return 0; /* partial overlap */
636 /* True if hx is a dead host extent, i.e. corresponds to host code
637 of an entry that was invalidated. */
639 Bool
HostExtent__is_dead (const HostExtent
* hx
, const Sector
* sec
)
641 const UInt tteNo
= hx
->tteNo
;
642 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
644 " start 0x%p len %u sector %d ttslot %u" \
645 " tt.entry 0x%llu tt.tcptr 0x%p\n", \
646 hx->start, hx->len, (int)(sec - sectors), \
648 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
650 /* Entry might have been invalidated and not re-used yet.*/
651 if (sec
->tt
[tteNo
].status
== Deleted
) {
652 LDEBUG("found deleted entry");
655 /* Maybe we found this entry via a host_extents which was
656 inserted for an entry which was changed to Deleted then
657 re-used after. If this entry was re-used, then its tcptr
658 is >= to host_extents start (i.e. the previous tcptr) + len.
659 This is the case as there is no re-use of host code: a new
660 entry or re-used entry always gets "higher value" host code. */
661 if ((UChar
*) sec
->tt
[tteNo
].tcptr
>= hx
->start
+ hx
->len
) {
662 LDEBUG("found re-used entry");
670 static __attribute__((noinline
))
671 Bool
find_TTEntry_from_hcode( /*OUT*/UInt
* from_sNo
,
672 /*OUT*/UInt
* from_tteNo
,
677 /* Search order logic copied from VG_(search_transtab). */
678 for (i
= 0; i
< n_sectors
; i
++) {
679 Int sno
= sector_search_order
[i
];
680 if (UNLIKELY(sno
== -1))
681 return False
; /* run out of sectors to search */
683 const Sector
* sec
= §ors
[sno
];
684 const XArray
* /* of HostExtent */ host_extents
= sec
->host_extents
;
685 vg_assert(host_extents
);
688 VG_(memset
)(&key
, 0, sizeof(key
));
691 Word firstW
= -1, lastW
= -1;
692 Bool found
= VG_(lookupXA_UNSAFE
)(
693 host_extents
, &key
, &firstW
, &lastW
,
694 HostExtent__cmpOrd
);
695 vg_assert(firstW
== lastW
); // always true, even if not found
697 HostExtent
* hx
= VG_(indexXA
)(host_extents
, firstW
);
698 UInt tteNo
= hx
->tteNo
;
699 /* Do some additional sanity checks. */
700 vg_assert(tteNo
<= N_TTES_PER_SECTOR
);
702 /* if this hx entry corresponds to dead host code, we must
703 tell this code has not been found, as it cannot be patched. */
704 if (HostExtent__is_dead (hx
, sec
))
707 vg_assert(sec
->tt
[tteNo
].status
== InUse
);
708 /* Can only half check that the found TTEntry contains hcode,
709 due to not having a length value for the hcode in the
711 vg_assert((UChar
*)sec
->tt
[tteNo
].tcptr
<= (UChar
*)hcode
);
712 /* Looks plausible */
714 *from_tteNo
= (UInt
)tteNo
;
722 /* Figure out whether or not hcode is jitted code present in the main
723 code cache (but not in the no-redir cache). Used for sanity
725 static Bool
is_in_the_main_TC ( const void* hcode
)
728 for (i
= 0; i
< n_sectors
; i
++) {
729 sno
= sector_search_order
[i
];
731 break; /* run out of sectors to search */
732 if ((const UChar
*)hcode
>= (const UChar
*)sectors
[sno
].tc
733 && (const UChar
*)hcode
<= (const UChar
*)sectors
[sno
].tc_next
741 /* Fulfill a chaining request, and record admin info so we
742 can undo it later, if required.
744 void VG_(tt_tc_do_chaining
) ( void* from__patch_addr
,
749 /* Get the CPU info established at startup. */
750 VexArch arch_host
= VexArch_INVALID
;
751 VexArchInfo archinfo_host
;
752 VG_(bzero_inline
)(&archinfo_host
, sizeof(archinfo_host
));
753 VG_(machine_get_VexArchInfo
)( &arch_host
, &archinfo_host
);
754 VexEndness endness_host
= archinfo_host
.endness
;
756 // host_code is where we're patching to. So it needs to
757 // take into account, whether we're jumping to the slow
758 // or fast entry point. By definition, the fast entry point
759 // is exactly one event check's worth of code along from
760 // the slow (tcptr) entry point.
761 TTEntry
* to_tte
= index_tte(to_sNo
, to_tteNo
);
762 void* host_code
= ((UChar
*)to_tte
->tcptr
)
763 + (to_fastEP
? LibVEX_evCheckSzB(arch_host
,
766 // stay sane -- the patch point (dst) is in this sector's code cache
767 vg_assert( (UChar
*)host_code
>= (UChar
*)sectors
[to_sNo
].tc
);
768 vg_assert( (UChar
*)host_code
<= (UChar
*)sectors
[to_sNo
].tc_next
769 + sizeof(ULong
) - 1 );
771 /* Find the TTEntry for the from__ code. This isn't simple since
772 we only know the patch address, which is going to be somewhere
773 inside the from_ block. */
774 UInt from_sNo
= (UInt
)-1;
775 UInt from_tteNo
= (UInt
)-1;
777 = find_TTEntry_from_hcode( &from_sNo
, &from_tteNo
,
780 // The from code might have been discarded due to sector re-use
781 // or marked Deleted due to translation invalidation.
782 // In such a case, don't do the chaining.
783 VG_(debugLog
)(1,"transtab",
784 "host code %p not found (discarded? sector recycled?)"
785 " => no chaining done\n",
790 TTEntry
* from_tte
= index_tte(from_sNo
, from_tteNo
);
792 /* Get VEX to do the patching itself. We have to hand it off
793 since it is host-dependent. */
796 arch_host
, endness_host
,
798 VG_(fnptr_to_fnentry
)(
799 to_fastEP
? &VG_(disp_cp_chain_me_to_fastEP
)
800 : &VG_(disp_cp_chain_me_to_slowEP
)),
803 VG_(invalidate_icache
)( (void*)vir
.start
, vir
.len
);
805 /* Now do the tricky bit -- update the ch_succs and ch_preds info
806 for the two translations involved, so we can undo the chaining
807 later, which we will have to do if the to_ block gets removed
808 for whatever reason. */
810 /* This is the new from_ -> to_ link to add. */
813 ie
.from_sNo
= from_sNo
;
814 ie
.from_tteNo
= from_tteNo
;
815 ie
.to_fastEP
= to_fastEP
;
816 HWord from_offs
= (HWord
)( (UChar
*)from__patch_addr
817 - (UChar
*)from_tte
->tcptr
);
818 vg_assert(from_offs
< 100000/* let's say */);
819 ie
.from_offs
= (UInt
)from_offs
;
821 /* This is the new to_ -> from_ backlink to add. */
825 oe
.to_tteNo
= to_tteNo
;
826 oe
.from_offs
= (UInt
)from_offs
;
829 InEdgeArr__add(&to_tte
->in_edges
, &ie
);
830 OutEdgeArr__add(&from_tte
->out_edges
, &oe
);
834 /* Unchain one patch, as described by the specified InEdge. For
835 sanity check purposes only (to check that the patched location is
836 as expected) it also requires the fast and slow entry point
837 addresses of the destination block (that is, the block that owns
839 __attribute__((noinline
))
840 static void unchain_one ( VexArch arch_host
, VexEndness endness_host
,
842 void* to_fastEPaddr
, void* to_slowEPaddr
)
846 = index_tte(ie
->from_sNo
, ie
->from_tteNo
);
847 UChar
* place_to_patch
848 = ((UChar
*)tte
->tcptr
) + ie
->from_offs
;
849 UChar
* disp_cp_chain_me
850 = VG_(fnptr_to_fnentry
)(
851 ie
->to_fastEP
? &VG_(disp_cp_chain_me_to_fastEP
)
852 : &VG_(disp_cp_chain_me_to_slowEP
)
854 UChar
* place_to_jump_to_EXPECTED
855 = ie
->to_fastEP
? to_fastEPaddr
: to_slowEPaddr
;
857 // stay sane: both src and dst for this unchaining are
858 // in the main code cache
859 vg_assert( is_in_the_main_TC(place_to_patch
) ); // src
860 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED
) ); // dst
861 // dst check is ok because LibVEX_UnChain checks that
862 // place_to_jump_to_EXPECTED really is the current dst, and
863 // asserts if it isn't.
865 = LibVEX_UnChain( arch_host
, endness_host
, place_to_patch
,
866 place_to_jump_to_EXPECTED
, disp_cp_chain_me
);
867 VG_(invalidate_icache
)( (void*)vir
.start
, vir
.len
);
871 /* The specified block is about to be deleted. Update the preds and
872 succs of its associated blocks accordingly. This includes undoing
873 any chained jumps to this block. */
875 void unchain_in_preparation_for_deletion ( VexArch arch_host
,
876 VexEndness endness_host
,
877 UInt here_sNo
, UInt here_tteNo
)
880 VG_(printf
)("QQQ unchain_in_prep %u.%u...\n", here_sNo
, here_tteNo
);
882 Int evCheckSzB
= LibVEX_evCheckSzB(arch_host
, endness_host
);
883 TTEntry
* here_tte
= index_tte(here_sNo
, here_tteNo
);
885 VG_(printf
)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n",
886 here_tte
->entry
, here_tte
->tcptr
);
887 vg_assert(here_tte
->status
== InUse
);
889 /* Visit all InEdges owned by here_tte. */
890 n
= InEdgeArr__size(&here_tte
->in_edges
);
891 for (i
= 0; i
< n
; i
++) {
892 InEdge
* ie
= InEdgeArr__index(&here_tte
->in_edges
, i
);
893 // Undo the chaining.
894 UChar
* here_slow_EP
= (UChar
*)here_tte
->tcptr
;
895 UChar
* here_fast_EP
= here_slow_EP
+ evCheckSzB
;
896 unchain_one(arch_host
, endness_host
, ie
, here_fast_EP
, here_slow_EP
);
897 // Find the corresponding entry in the "from" node's out_edges,
899 TTEntry
* from_tte
= index_tte(ie
->from_sNo
, ie
->from_tteNo
);
900 m
= OutEdgeArr__size(&from_tte
->out_edges
);
901 vg_assert(m
> 0); // it must have at least one entry
902 for (j
= 0; j
< m
; j
++) {
903 OutEdge
* oe
= OutEdgeArr__index(&from_tte
->out_edges
, j
);
904 if (oe
->to_sNo
== here_sNo
&& oe
->to_tteNo
== here_tteNo
905 && oe
->from_offs
== ie
->from_offs
)
908 vg_assert(j
< m
); // "oe must be findable"
909 OutEdgeArr__deleteIndex(&from_tte
->out_edges
, j
);
912 /* Visit all OutEdges owned by here_tte. */
913 n
= OutEdgeArr__size(&here_tte
->out_edges
);
914 for (i
= 0; i
< n
; i
++) {
915 OutEdge
* oe
= OutEdgeArr__index(&here_tte
->out_edges
, i
);
916 // Find the corresponding entry in the "to" node's in_edges,
918 TTEntry
* to_tte
= index_tte(oe
->to_sNo
, oe
->to_tteNo
);
919 m
= InEdgeArr__size(&to_tte
->in_edges
);
920 vg_assert(m
> 0); // it must have at least one entry
921 for (j
= 0; j
< m
; j
++) {
922 InEdge
* ie
= InEdgeArr__index(&to_tte
->in_edges
, j
);
923 if (ie
->from_sNo
== here_sNo
&& ie
->from_tteNo
== here_tteNo
924 && ie
->from_offs
== oe
->from_offs
)
927 vg_assert(j
< m
); // "ie must be findable"
928 InEdgeArr__deleteIndex(&to_tte
->in_edges
, j
);
931 InEdgeArr__makeEmpty(&here_tte
->in_edges
);
932 OutEdgeArr__makeEmpty(&here_tte
->out_edges
);
936 /*-------------------------------------------------------------*/
937 /*--- Address-range equivalence class stuff ---*/
938 /*-------------------------------------------------------------*/
940 /* Return equivalence class number for a range. */
942 static Int
range_to_eclass ( Addr64 start
, UInt len
)
944 UInt mask
= (1 << ECLASS_WIDTH
) - 1;
945 UInt lo
= (UInt
)start
;
946 UInt hi
= lo
+ len
- 1;
947 UInt loBits
= (lo
>> ECLASS_SHIFT
) & mask
;
948 UInt hiBits
= (hi
>> ECLASS_SHIFT
) & mask
;
949 if (loBits
== hiBits
) {
950 vg_assert(loBits
< ECLASS_N
-1);
958 /* Calculates the equivalence class numbers for any VexGuestExtent.
959 These are written in *eclasses, which must be big enough to hold 3
960 Ints. The number written, between 1 and 3, is returned. The
961 eclasses are presented in order, and any duplicates are removed.
965 Int
vexGuestExtents_to_eclasses ( /*OUT*/Int
* eclasses
,
966 const VexGuestExtents
* vge
)
969 # define SWAP(_lv1,_lv2) \
970 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
974 vg_assert(vge
->n_used
>= 1 && vge
->n_used
<= 3);
977 for (i
= 0; i
< vge
->n_used
; i
++) {
978 r
= range_to_eclass( vge
->base
[i
], (UInt
)vge
->len
[i
] );
979 if (r
== ECLASS_MISC
)
981 /* only add if we haven't already seen it */
982 for (j
= 0; j
< n_ec
; j
++)
983 if (eclasses
[j
] == r
)
986 eclasses
[n_ec
++] = r
;
994 if (eclasses
[0] > eclasses
[1])
995 SWAP(eclasses
[0], eclasses
[1]);
1001 if (eclasses
[0] > eclasses
[2])
1002 SWAP(eclasses
[0], eclasses
[2]);
1003 if (eclasses
[0] > eclasses
[1])
1004 SWAP(eclasses
[0], eclasses
[1]);
1005 if (eclasses
[1] > eclasses
[2])
1006 SWAP(eclasses
[1], eclasses
[2]);
1014 eclasses
[0] = ECLASS_MISC
;
1021 /* Add tteno to the set of entries listed for equivalence class ec in
1022 this sector. Returns used location in eclass array. */
1025 UInt
addEClassNo ( /*MOD*/Sector
* sec
, Int ec
, UShort tteno
)
1027 Int old_sz
, new_sz
, i
, r
;
1028 UShort
*old_ar
, *new_ar
;
1030 vg_assert(ec
>= 0 && ec
< ECLASS_N
);
1031 vg_assert(tteno
< N_TTES_PER_SECTOR
);
1033 if (DEBUG_TRANSTAB
) VG_(printf
)("ec %d gets %d\n", ec
, (Int
)tteno
);
1035 if (sec
->ec2tte_used
[ec
] >= sec
->ec2tte_size
[ec
]) {
1037 vg_assert(sec
->ec2tte_used
[ec
] == sec
->ec2tte_size
[ec
]);
1039 old_sz
= sec
->ec2tte_size
[ec
];
1040 old_ar
= sec
->ec2tte
[ec
];
1041 new_sz
= old_sz
==0 ? 8 : old_sz
<64 ? 2*old_sz
: (3*old_sz
)/2;
1042 new_ar
= ttaux_malloc("transtab.aECN.1",
1043 new_sz
* sizeof(UShort
));
1044 for (i
= 0; i
< old_sz
; i
++)
1045 new_ar
[i
] = old_ar
[i
];
1048 sec
->ec2tte_size
[ec
] = new_sz
;
1049 sec
->ec2tte
[ec
] = new_ar
;
1051 if (DEBUG_TRANSTAB
) VG_(printf
)("expand ec %d to %d\n", ec
, new_sz
);
1055 r
= sec
->ec2tte_used
[ec
]++;
1056 vg_assert(r
>= 0 && r
< sec
->ec2tte_size
[ec
]);
1057 sec
->ec2tte
[ec
][r
] = tteno
;
1062 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1063 eclass entries to 'sec'. */
1066 void upd_eclasses_after_add ( /*MOD*/Sector
* sec
, Int tteno
)
1068 Int i
, r
, eclasses
[3];
1070 vg_assert(tteno
>= 0 && tteno
< N_TTES_PER_SECTOR
);
1072 tte
= &sec
->tt
[tteno
];
1073 r
= vexGuestExtents_to_eclasses( eclasses
, &tte
->vge
);
1075 vg_assert(r
>= 1 && r
<= 3);
1078 for (i
= 0; i
< r
; i
++) {
1079 tte
->tte2ec_ec
[i
] = eclasses
[i
];
1080 tte
->tte2ec_ix
[i
] = addEClassNo( sec
, eclasses
[i
], (UShort
)tteno
);
1085 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1086 True if OK, False if something's not right. Expensive. */
1088 static Bool
sanity_check_eclasses_in_sector ( const Sector
* sec
)
1090 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1092 const HChar
* whassup
= NULL
;
1093 Int i
, j
, k
, n
, ec_num
, ec_idx
;
1098 /* Basic checks on this sector */
1099 if (sec
->tt_n_inuse
< 0 || sec
->tt_n_inuse
> N_TTES_PER_SECTOR_USABLE
)
1100 BAD("invalid sec->tt_n_inuse");
1102 if (tce
< &sec
->tc
[0] || tce
> &sec
->tc
[tc_sector_szQ
])
1103 BAD("sec->tc_next points outside tc");
1105 /* For each eclass ... */
1106 for (i
= 0; i
< ECLASS_N
; i
++) {
1107 if (sec
->ec2tte_size
[i
] == 0 && sec
->ec2tte
[i
] != NULL
)
1108 BAD("ec2tte_size/ec2tte mismatch(1)");
1109 if (sec
->ec2tte_size
[i
] != 0 && sec
->ec2tte
[i
] == NULL
)
1110 BAD("ec2tte_size/ec2tte mismatch(2)");
1111 if (sec
->ec2tte_used
[i
] < 0
1112 || sec
->ec2tte_used
[i
] > sec
->ec2tte_size
[i
])
1113 BAD("implausible ec2tte_used");
1114 if (sec
->ec2tte_used
[i
] == 0)
1117 /* For each tt reference in each eclass .. ensure the reference
1118 is to a valid tt entry, and that the entry's address ranges
1119 really include this eclass. */
1121 for (j
= 0; j
< sec
->ec2tte_used
[i
]; j
++) {
1122 tteno
= sec
->ec2tte
[i
][j
];
1123 if (tteno
== EC2TTE_DELETED
)
1125 if (tteno
>= N_TTES_PER_SECTOR
)
1126 BAD("implausible tteno");
1127 tte
= &sec
->tt
[tteno
];
1128 if (tte
->status
!= InUse
)
1129 BAD("tteno points to non-inuse tte");
1130 if (tte
->n_tte2ec
< 1 || tte
->n_tte2ec
> 3)
1131 BAD("tte->n_tte2ec out of range");
1132 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1133 must equal i. Inspect tte's eclass info. */
1135 for (k
= 0; k
< tte
->n_tte2ec
; k
++) {
1136 if (k
< tte
->n_tte2ec
-1
1137 && tte
->tte2ec_ec
[k
] >= tte
->tte2ec_ec
[k
+1])
1138 BAD("tte->tte2ec_ec[..] out of order");
1139 ec_num
= tte
->tte2ec_ec
[k
];
1140 if (ec_num
< 0 || ec_num
>= ECLASS_N
)
1141 BAD("tte->tte2ec_ec[..] out of range");
1144 ec_idx
= tte
->tte2ec_ix
[k
];
1145 if (ec_idx
< 0 || ec_idx
>= sec
->ec2tte_used
[i
])
1146 BAD("tte->tte2ec_ix[..] out of range");
1151 BAD("tteno does not point back at eclass");
1155 /* That establishes that for each forward pointer from TTEntrys
1156 there is a corresponding backward pointer from the eclass[]
1157 arrays. However, it doesn't rule out the possibility of other,
1158 bogus pointers in the eclass[] arrays. So do those similarly:
1159 scan through them and check the TTEntryies they point at point
1162 for (i
= 0; i
< N_TTES_PER_SECTOR_USABLE
; i
++) {
1165 if (tte
->status
== Empty
|| tte
->status
== Deleted
) {
1166 if (tte
->n_tte2ec
!= 0)
1167 BAD("tte->n_eclasses nonzero for unused tte");
1171 vg_assert(tte
->status
== InUse
);
1173 if (tte
->n_tte2ec
< 1 || tte
->n_tte2ec
> 3)
1174 BAD("tte->n_eclasses out of range(2)");
1176 for (j
= 0; j
< tte
->n_tte2ec
; j
++) {
1177 ec_num
= tte
->tte2ec_ec
[j
];
1178 if (ec_num
< 0 || ec_num
>= ECLASS_N
)
1179 BAD("tte->eclass[..] out of range");
1180 ec_idx
= tte
->tte2ec_ix
[j
];
1181 if (ec_idx
< 0 || ec_idx
>= sec
->ec2tte_used
[ec_num
])
1182 BAD("tte->ec_idx[..] out of range(2)");
1183 if (sec
->ec2tte
[ec_num
][ec_idx
] != i
)
1184 BAD("ec2tte does not point back to tte");
1192 VG_(debugLog
)(0, "transtab", "eclass sanity fail: %s\n", whassup
);
1195 VG_(printf
)("eclass = %d\n", i
);
1196 VG_(printf
)("tteno = %d\n", (Int
)tteno
);
1197 switch (tte
->status
) {
1198 case InUse
: VG_(printf
)("InUse\n"); break;
1199 case Deleted
: VG_(printf
)("Deleted\n"); break;
1200 case Empty
: VG_(printf
)("Empty\n"); break;
1202 if (tte
->status
!= Empty
) {
1203 for (k
= 0; k
< tte
->vge
.n_used
; k
++)
1204 VG_(printf
)("0x%llx %d\n", tte
->vge
.base
[k
],
1205 (Int
)tte
->vge
.len
[k
]);
1215 /* Sanity check absolutely everything. True == check passed. */
1218 static Bool
sanity_check_redir_tt_tc ( void );
1220 static Bool
sanity_check_sector_search_order ( void )
1223 /* assert the array is the right size */
1224 vg_assert(MAX_N_SECTORS
== (sizeof(sector_search_order
)
1225 / sizeof(sector_search_order
[0])));
1226 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */
1227 for (i
= 0; i
< n_sectors
; i
++) {
1228 if (sector_search_order
[i
] < 0 || sector_search_order
[i
] >= n_sectors
)
1232 for (/* */; i
< n_sectors
; i
++) {
1233 if (sector_search_order
[i
] != -1)
1238 /* Check each sector number only appears once */
1239 for (i
= 0; i
< n_sectors
; i
++) {
1240 if (sector_search_order
[i
] == -1)
1242 for (j
= i
+1; j
< n_sectors
; j
++) {
1243 if (sector_search_order
[j
] == sector_search_order
[i
])
1247 /* Check that the number of listed sectors equals the number
1248 in use, by counting nListed back down. */
1249 for (i
= 0; i
< n_sectors
; i
++) {
1250 if (sectors
[i
].tc
!= NULL
)
1258 static Bool
sanity_check_all_sectors ( void )
1263 for (sno
= 0; sno
< n_sectors
; sno
++) {
1265 Int nr_not_dead_hx
= 0;
1267 sec
= §ors
[sno
];
1268 if (sec
->tc
== NULL
)
1270 sane
= sanity_check_eclasses_in_sector( sec
);
1273 szhxa
= VG_(sizeXA
)(sec
->host_extents
);
1274 for (i
= 0; i
< szhxa
; i
++) {
1275 const HostExtent
* hx
= VG_(indexXA
)(sec
->host_extents
, i
);
1276 if (!HostExtent__is_dead (hx
, sec
))
1279 if (nr_not_dead_hx
!= sec
->tt_n_inuse
) {
1280 VG_(debugLog
)(0, "transtab",
1281 "nr_not_dead_hx %d sanity fail (expected == in use %d)\n",
1282 nr_not_dead_hx
, sec
->tt_n_inuse
);
1287 if ( !sanity_check_redir_tt_tc() )
1289 if ( !sanity_check_sector_search_order() )
1296 /*-------------------------------------------------------------*/
1297 /*--- Add/find translations ---*/
1298 /*-------------------------------------------------------------*/
1300 static UInt
vge_osize ( const VexGuestExtents
* vge
)
1303 for (i
= 0; i
< vge
->n_used
; i
++)
1304 n
+= (UInt
)vge
->len
[i
];
1308 static Bool
isValidSector ( Int sector
)
1310 if (sector
< 0 || sector
>= n_sectors
)
1315 static inline UInt
HASH_TT ( Addr64 key
)
1317 UInt kHi
= (UInt
)(key
>> 32);
1318 UInt kLo
= (UInt
)key
;
1319 UInt k32
= kHi
^ kLo
;
1322 k32
= (k32
>> ror
) | (k32
<< (32-ror
));
1323 return k32
% N_TTES_PER_SECTOR
;
1326 static void setFastCacheEntry ( Addr64 key
, ULong
* tcptr
)
1328 UInt cno
= (UInt
)VG_TT_FAST_HASH(key
);
1329 VG_(tt_fast
)[cno
].guest
= (Addr
)key
;
1330 VG_(tt_fast
)[cno
].host
= (Addr
)tcptr
;
1332 /* This shouldn't fail. It should be assured by m_translate
1333 which should reject any attempt to make translation of code
1334 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1335 vg_assert(VG_(tt_fast
)[cno
].guest
!= TRANSTAB_BOGUS_GUEST_ADDR
);
1338 /* Invalidate the fast cache VG_(tt_fast). */
1339 static void invalidateFastCache ( void )
1342 /* This loop is popular enough to make it worth unrolling a
1343 bit, at least on ppc32. */
1344 vg_assert(VG_TT_FAST_SIZE
> 0 && (VG_TT_FAST_SIZE
% 4) == 0);
1345 for (j
= 0; j
< VG_TT_FAST_SIZE
; j
+= 4) {
1346 VG_(tt_fast
)[j
+0].guest
= TRANSTAB_BOGUS_GUEST_ADDR
;
1347 VG_(tt_fast
)[j
+1].guest
= TRANSTAB_BOGUS_GUEST_ADDR
;
1348 VG_(tt_fast
)[j
+2].guest
= TRANSTAB_BOGUS_GUEST_ADDR
;
1349 VG_(tt_fast
)[j
+3].guest
= TRANSTAB_BOGUS_GUEST_ADDR
;
1352 vg_assert(j
== VG_TT_FAST_SIZE
);
1356 static void initialiseSector ( Int sno
)
1361 vg_assert(isValidSector(sno
));
1363 { Bool sane
= sanity_check_sector_search_order();
1366 sec
= §ors
[sno
];
1368 if (sec
->tc
== NULL
) {
1370 /* Sector has never been used before. Need to allocate tt and
1372 vg_assert(sec
->tt
== NULL
);
1373 vg_assert(sec
->tc_next
== NULL
);
1374 vg_assert(sec
->tt_n_inuse
== 0);
1375 for (i
= 0; i
< ECLASS_N
; i
++) {
1376 vg_assert(sec
->ec2tte_size
[i
] == 0);
1377 vg_assert(sec
->ec2tte_used
[i
] == 0);
1378 vg_assert(sec
->ec2tte
[i
] == NULL
);
1380 vg_assert(sec
->host_extents
== NULL
);
1382 VG_(debugLog
)(1,"transtab", "allocate sector %d\n", sno
);
1384 VG_(dmsg
)("transtab: " "allocate sector %d\n", sno
);
1386 sres
= VG_(am_mmap_anon_float_valgrind
)( 8 * tc_sector_szQ
);
1387 if (sr_isError(sres
)) {
1388 VG_(out_of_memory_NORETURN
)("initialiseSector(TC)",
1389 8 * tc_sector_szQ
);
1392 sec
->tc
= (ULong
*)(AddrH
)sr_Res(sres
);
1394 sres
= VG_(am_mmap_anon_float_valgrind
)
1395 ( N_TTES_PER_SECTOR
* sizeof(TTEntry
) );
1396 if (sr_isError(sres
)) {
1397 VG_(out_of_memory_NORETURN
)("initialiseSector(TT)",
1398 N_TTES_PER_SECTOR
* sizeof(TTEntry
) );
1401 sec
->tt
= (TTEntry
*)(AddrH
)sr_Res(sres
);
1403 for (i
= 0; i
< N_TTES_PER_SECTOR
; i
++) {
1404 sec
->tt
[i
].status
= Empty
;
1405 sec
->tt
[i
].n_tte2ec
= 0;
1408 /* Set up the host_extents array. */
1410 = VG_(newXA
)(ttaux_malloc
, "transtab.initialiseSector(host_extents)",
1412 sizeof(HostExtent
));
1414 /* Add an entry in the sector_search_order */
1415 for (i
= 0; i
< n_sectors
; i
++) {
1416 if (sector_search_order
[i
] == -1)
1419 vg_assert(i
>= 0 && i
< n_sectors
);
1420 sector_search_order
[i
] = sno
;
1422 if (VG_(clo_verbosity
) > 2)
1423 VG_(message
)(Vg_DebugMsg
, "TT/TC: initialise sector %d\n", sno
);
1427 /* Sector has been used before. Dump the old contents. */
1428 VG_(debugLog
)(1,"transtab", "recycle sector %d\n", sno
);
1430 VG_(dmsg
)("transtab: " "recycle sector %d\n", sno
);
1432 vg_assert(sec
->tt
!= NULL
);
1433 vg_assert(sec
->tc_next
!= NULL
);
1434 n_dump_count
+= sec
->tt_n_inuse
;
1436 VexArch arch_host
= VexArch_INVALID
;
1437 VexArchInfo archinfo_host
;
1438 VG_(bzero_inline
)(&archinfo_host
, sizeof(archinfo_host
));
1439 VG_(machine_get_VexArchInfo
)( &arch_host
, &archinfo_host
);
1440 VexEndness endness_host
= archinfo_host
.endness
;
1442 /* Visit each just-about-to-be-abandoned translation. */
1443 if (DEBUG_TRANSTAB
) VG_(printf
)("QQQ unlink-entire-sector: %d START\n",
1445 for (i
= 0; i
< N_TTES_PER_SECTOR
; i
++) {
1446 if (sec
->tt
[i
].status
== InUse
) {
1447 vg_assert(sec
->tt
[i
].n_tte2ec
>= 1);
1448 vg_assert(sec
->tt
[i
].n_tte2ec
<= 3);
1449 n_dump_osize
+= vge_osize(&sec
->tt
[i
].vge
);
1450 /* Tell the tool too. */
1451 if (VG_(needs
).superblock_discards
) {
1452 VG_TDICT_CALL( tool_discard_superblock_info
,
1456 unchain_in_preparation_for_deletion(arch_host
,
1457 endness_host
, sno
, i
);
1459 vg_assert(sec
->tt
[i
].n_tte2ec
== 0);
1461 sec
->tt
[i
].status
= Empty
;
1462 sec
->tt
[i
].n_tte2ec
= 0;
1464 if (DEBUG_TRANSTAB
) VG_(printf
)("QQQ unlink-entire-sector: %d END\n",
1467 /* Free up the eclass structures. */
1468 for (i
= 0; i
< ECLASS_N
; i
++) {
1469 if (sec
->ec2tte_size
[i
] == 0) {
1470 vg_assert(sec
->ec2tte_used
[i
] == 0);
1471 vg_assert(sec
->ec2tte
[i
] == NULL
);
1473 vg_assert(sec
->ec2tte
[i
] != NULL
);
1474 ttaux_free(sec
->ec2tte
[i
]);
1475 sec
->ec2tte
[i
] = NULL
;
1476 sec
->ec2tte_size
[i
] = 0;
1477 sec
->ec2tte_used
[i
] = 0;
1481 /* Empty out the host extents array. */
1482 vg_assert(sec
->host_extents
!= NULL
);
1483 VG_(dropTailXA
)(sec
->host_extents
, VG_(sizeXA
)(sec
->host_extents
));
1484 vg_assert(VG_(sizeXA
)(sec
->host_extents
) == 0);
1486 /* Sanity check: ensure it is already in
1487 sector_search_order[]. */
1488 for (i
= 0; i
< n_sectors
; i
++) {
1489 if (sector_search_order
[i
] == sno
)
1492 vg_assert(i
>= 0 && i
< n_sectors
);
1494 if (VG_(clo_verbosity
) > 2)
1495 VG_(message
)(Vg_DebugMsg
, "TT/TC: recycle sector %d\n", sno
);
1498 sec
->tc_next
= sec
->tc
;
1499 sec
->tt_n_inuse
= 0;
1501 invalidateFastCache();
1503 { Bool sane
= sanity_check_sector_search_order();
1509 /* Add a translation of vge to TT/TC. The translation is temporarily
1510 in code[0 .. code_len-1].
1512 pre: youngest_sector points to a valid (although possibly full)
1515 void VG_(add_to_transtab
)( const VexGuestExtents
* vge
,
1519 Bool is_self_checking
,
1521 UInt n_guest_instrs
)
1523 Int tcAvailQ
, reqdQ
, y
, i
;
1524 ULong
*tcptr
, *tcptr2
;
1528 vg_assert(init_done
);
1529 vg_assert(vge
->n_used
>= 1 && vge
->n_used
<= 3);
1531 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1532 vg_assert(code_len
> 0 && code_len
< 60000);
1534 /* Generally stay sane */
1535 vg_assert(n_guest_instrs
< 200); /* it can be zero, tho */
1538 VG_(printf
)("add_to_transtab(entry = 0x%llx, len = %d) ...\n",
1542 n_in_tsize
+= code_len
;
1543 n_in_osize
+= vge_osize(vge
);
1544 if (is_self_checking
)
1547 y
= youngest_sector
;
1548 vg_assert(isValidSector(y
));
1550 if (sectors
[y
].tc
== NULL
)
1551 initialiseSector(y
);
1553 /* Try putting the translation in this sector. */
1554 reqdQ
= (code_len
+ 7) >> 3;
1556 /* Will it fit in tc? */
1557 tcAvailQ
= ((ULong
*)(§ors
[y
].tc
[tc_sector_szQ
]))
1558 - ((ULong
*)(sectors
[y
].tc_next
));
1559 vg_assert(tcAvailQ
>= 0);
1560 vg_assert(tcAvailQ
<= tc_sector_szQ
);
1562 if (tcAvailQ
< reqdQ
1563 || sectors
[y
].tt_n_inuse
>= N_TTES_PER_SECTOR_USABLE
) {
1564 /* No. So move on to the next sector. Either it's never been
1565 used before, in which case it will get its tt/tc allocated
1566 now, or it has been used before, in which case it is set to be
1567 empty, hence throwing out the oldest sector. */
1568 vg_assert(tc_sector_szQ
> 0);
1569 Int tt_loading_pct
= (100 * sectors
[y
].tt_n_inuse
)
1570 / N_TTES_PER_SECTOR
;
1571 Int tc_loading_pct
= (100 * (tc_sector_szQ
- tcAvailQ
))
1573 VG_(debugLog
)(1,"transtab",
1574 "declare sector %d full "
1575 "(TT loading %2d%%, TC loading %2d%%)\n",
1576 y
, tt_loading_pct
, tc_loading_pct
);
1577 if (VG_(clo_stats
)) {
1578 VG_(dmsg
)("transtab: "
1579 "declare sector %d full "
1580 "(TT loading %2d%%, TC loading %2d%%)\n",
1581 y
, tt_loading_pct
, tc_loading_pct
);
1584 if (youngest_sector
>= n_sectors
)
1585 youngest_sector
= 0;
1586 y
= youngest_sector
;
1587 initialiseSector(y
);
1591 tcAvailQ
= ((ULong
*)(§ors
[y
].tc
[tc_sector_szQ
]))
1592 - ((ULong
*)(sectors
[y
].tc_next
));
1593 vg_assert(tcAvailQ
>= 0);
1594 vg_assert(tcAvailQ
<= tc_sector_szQ
);
1595 vg_assert(tcAvailQ
>= reqdQ
);
1596 vg_assert(sectors
[y
].tt_n_inuse
< N_TTES_PER_SECTOR_USABLE
);
1597 vg_assert(sectors
[y
].tt_n_inuse
>= 0);
1600 tcptr
= sectors
[y
].tc_next
;
1601 vg_assert(tcptr
>= §ors
[y
].tc
[0]);
1602 vg_assert(tcptr
<= §ors
[y
].tc
[tc_sector_szQ
]);
1604 dstP
= (UChar
*)tcptr
;
1605 srcP
= (UChar
*)code
;
1606 VG_(memcpy
)(dstP
, srcP
, code_len
);
1607 sectors
[y
].tc_next
+= reqdQ
;
1608 sectors
[y
].tt_n_inuse
++;
1611 tcptr2
= sectors
[y
].tc_next
;
1612 vg_assert(tcptr2
>= §ors
[y
].tc
[0]);
1613 vg_assert(tcptr2
<= §ors
[y
].tc
[tc_sector_szQ
]);
1615 /* Find an empty tt slot, and use it. There must be such a slot
1616 since tt is never allowed to get completely full. */
1618 vg_assert(i
>= 0 && i
< N_TTES_PER_SECTOR
);
1620 if (sectors
[y
].tt
[i
].status
== Empty
1621 || sectors
[y
].tt
[i
].status
== Deleted
)
1624 if (i
>= N_TTES_PER_SECTOR
)
1628 TTEntry__init(§ors
[y
].tt
[i
]);
1629 sectors
[y
].tt
[i
].status
= InUse
;
1630 sectors
[y
].tt
[i
].tcptr
= tcptr
;
1631 sectors
[y
].tt
[i
].count
= 0;
1632 sectors
[y
].tt
[i
].weight
= n_guest_instrs
== 0 ? 1 : n_guest_instrs
;
1633 sectors
[y
].tt
[i
].vge
= *vge
;
1634 sectors
[y
].tt
[i
].entry
= entry
;
1636 /* Patch in the profile counter location, if necessary. */
1637 if (offs_profInc
!= -1) {
1638 vg_assert(offs_profInc
>= 0 && offs_profInc
< code_len
);
1639 VexArch arch_host
= VexArch_INVALID
;
1640 VexArchInfo archinfo_host
;
1641 VG_(bzero_inline
)(&archinfo_host
, sizeof(archinfo_host
));
1642 VG_(machine_get_VexArchInfo
)( &arch_host
, &archinfo_host
);
1643 VexEndness endness_host
= archinfo_host
.endness
;
1645 = LibVEX_PatchProfInc( arch_host
, endness_host
,
1646 dstP
+ offs_profInc
,
1647 §ors
[y
].tt
[i
].count
);
1648 VG_(invalidate_icache
)( (void*)vir
.start
, vir
.len
);
1651 VG_(invalidate_icache
)( dstP
, code_len
);
1653 /* Add this entry to the host_extents map, checking that we're
1656 hx
.start
= (UChar
*)tcptr
;
1659 vg_assert(hx
.len
> 0); /* bsearch fails w/ zero length entries */
1660 XArray
* hx_array
= sectors
[y
].host_extents
;
1661 vg_assert(hx_array
);
1662 Word n
= VG_(sizeXA
)(hx_array
);
1664 HostExtent
* hx_prev
= (HostExtent
*)VG_(indexXA
)(hx_array
, n
-1);
1665 vg_assert(hx_prev
->start
+ hx_prev
->len
<= hx
.start
);
1667 VG_(addToXA
)(hx_array
, &hx
);
1669 VG_(printf
)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1670 hx
.start
, hx
.len
, y
, i
);
1673 /* Update the fast-cache. */
1674 setFastCacheEntry( entry
, tcptr
);
1676 /* Note the eclass numbers for this translation. */
1677 upd_eclasses_after_add( §ors
[y
], i
);
1681 /* Search for the translation of the given guest address. If
1682 requested, a successful search can also cause the fast-caches to be
1685 Bool
VG_(search_transtab
) ( /*OUT*/AddrH
* res_hcode
,
1686 /*OUT*/UInt
* res_sNo
,
1687 /*OUT*/UInt
* res_tteNo
,
1691 Int i
, j
, k
, kstart
, sno
;
1693 vg_assert(init_done
);
1694 /* Find the initial probe point just once. It will be the same in
1695 all sectors and avoids multiple expensive % operations. */
1698 kstart
= HASH_TT(guest_addr
);
1699 vg_assert(kstart
>= 0 && kstart
< N_TTES_PER_SECTOR
);
1701 /* Search in all the sectors,using sector_search_order[] as a
1702 heuristic guide as to what order to visit the sectors. */
1703 for (i
= 0; i
< n_sectors
; i
++) {
1705 sno
= sector_search_order
[i
];
1706 if (UNLIKELY(sno
== -1))
1707 return False
; /* run out of sectors to search */
1710 for (j
= 0; j
< N_TTES_PER_SECTOR
; j
++) {
1712 if (sectors
[sno
].tt
[k
].status
== InUse
1713 && sectors
[sno
].tt
[k
].entry
== guest_addr
) {
1717 guest_addr
, sectors
[sno
].tt
[k
].tcptr
);
1719 *res_hcode
= (AddrH
)sectors
[sno
].tt
[k
].tcptr
;
1724 /* pull this one one step closer to the front. For large
1725 apps this more or less halves the number of required
1728 Int tmp
= sector_search_order
[i
-1];
1729 sector_search_order
[i
-1] = sector_search_order
[i
];
1730 sector_search_order
[i
] = tmp
;
1734 if (sectors
[sno
].tt
[k
].status
== Empty
)
1735 break; /* not found in this sector */
1737 if (k
== N_TTES_PER_SECTOR
)
1741 /* If we fall off the end, all entries are InUse and not
1742 matching, or Deleted. In any case we did not find it in this
1746 /* Not found in any sector. */
1751 /*-------------------------------------------------------------*/
1752 /*--- Delete translations. ---*/
1753 /*-------------------------------------------------------------*/
1756 static void unredir_discard_translations( Addr64
, ULong
);
1758 /* Stuff for deleting translations which intersect with a given
1759 address range. Unfortunately, to make this run at a reasonable
1760 speed, it is complex. */
1763 Bool
overlap1 ( Addr64 s1
, ULong r1
, Addr64 s2
, ULong r2
)
1765 Addr64 e1
= s1
+ r1
- 1ULL;
1766 Addr64 e2
= s2
+ r2
- 1ULL;
1767 if (e1
< s2
|| e2
< s1
)
1773 Bool
overlaps ( Addr64 start
, ULong range
, const VexGuestExtents
* vge
)
1775 if (overlap1(start
, range
, vge
->base
[0], (UInt
)vge
->len
[0]))
1777 if (vge
->n_used
< 2)
1779 if (overlap1(start
, range
, vge
->base
[1], (UInt
)vge
->len
[1]))
1781 if (vge
->n_used
< 3)
1783 if (overlap1(start
, range
, vge
->base
[2], (UInt
)vge
->len
[2]))
1789 /* Delete a tt entry, and update all the eclass data accordingly. */
1791 static void delete_tte ( /*MOD*/Sector
* sec
, UInt secNo
, Int tteno
,
1792 VexArch arch_host
, VexEndness endness_host
)
1794 Int i
, ec_num
, ec_idx
;
1797 /* sec and secNo are mutually redundant; cross-check. */
1798 vg_assert(sec
== §ors
[secNo
]);
1800 vg_assert(tteno
>= 0 && tteno
< N_TTES_PER_SECTOR
);
1801 tte
= &sec
->tt
[tteno
];
1802 vg_assert(tte
->status
== InUse
);
1803 vg_assert(tte
->n_tte2ec
>= 1 && tte
->n_tte2ec
<= 3);
1806 unchain_in_preparation_for_deletion(arch_host
, endness_host
, secNo
, tteno
);
1808 /* Deal with the ec-to-tte links first. */
1809 for (i
= 0; i
< tte
->n_tte2ec
; i
++) {
1810 ec_num
= (Int
)tte
->tte2ec_ec
[i
];
1811 ec_idx
= tte
->tte2ec_ix
[i
];
1812 vg_assert(ec_num
>= 0 && ec_num
< ECLASS_N
);
1813 vg_assert(ec_idx
>= 0);
1814 vg_assert(ec_idx
< sec
->ec2tte_used
[ec_num
]);
1815 /* Assert that the two links point at each other. */
1816 vg_assert(sec
->ec2tte
[ec_num
][ec_idx
] == (UShort
)tteno
);
1817 /* "delete" the pointer back to here. */
1818 sec
->ec2tte
[ec_num
][ec_idx
] = EC2TTE_DELETED
;
1821 /* Now fix up this TTEntry. */
1822 tte
->status
= Deleted
;
1828 n_disc_osize
+= vge_osize(&tte
->vge
);
1830 /* Tell the tool too. */
1831 if (VG_(needs
).superblock_discards
) {
1832 VG_TDICT_CALL( tool_discard_superblock_info
,
1839 /* Delete translations from sec which intersect specified range, but
1840 only consider translations in the specified eclass. */
1843 Bool
delete_translations_in_sector_eclass ( /*MOD*/Sector
* sec
, UInt secNo
,
1844 Addr64 guest_start
, ULong range
,
1847 VexEndness endness_host
)
1851 Bool anyDeld
= False
;
1854 vg_assert(ec
>= 0 && ec
< ECLASS_N
);
1856 for (i
= 0; i
< sec
->ec2tte_used
[ec
]; i
++) {
1858 tteno
= sec
->ec2tte
[ec
][i
];
1859 if (tteno
== EC2TTE_DELETED
) {
1860 /* already deleted */
1864 vg_assert(tteno
< N_TTES_PER_SECTOR
);
1866 tte
= &sec
->tt
[tteno
];
1867 vg_assert(tte
->status
== InUse
);
1869 if (overlaps( guest_start
, range
, &tte
->vge
)) {
1871 delete_tte( sec
, secNo
, (Int
)tteno
, arch_host
, endness_host
);
1880 /* Delete translations from sec which intersect specified range, the
1881 slow way, by inspecting all translations in sec. */
1884 Bool
delete_translations_in_sector ( /*MOD*/Sector
* sec
, UInt secNo
,
1885 Addr64 guest_start
, ULong range
,
1887 VexEndness endness_host
)
1890 Bool anyDeld
= False
;
1892 for (i
= 0; i
< N_TTES_PER_SECTOR
; i
++) {
1893 if (sec
->tt
[i
].status
== InUse
1894 && overlaps( guest_start
, range
, &sec
->tt
[i
].vge
)) {
1896 delete_tte( sec
, secNo
, i
, arch_host
, endness_host
);
1904 void VG_(discard_translations
) ( Addr64 guest_start
, ULong range
,
1909 Bool anyDeleted
= False
;
1911 vg_assert(init_done
);
1913 VG_(debugLog
)(2, "transtab",
1914 "discard_translations(0x%llx, %lld) req by %s\n",
1915 guest_start
, range
, who
);
1917 /* Pre-deletion sanity check */
1918 if (VG_(clo_sanity_level
>= 4)) {
1919 Bool sane
= sanity_check_all_sectors();
1926 VexArch arch_host
= VexArch_INVALID
;
1927 VexArchInfo archinfo_host
;
1928 VG_(bzero_inline
)(&archinfo_host
, sizeof(archinfo_host
));
1929 VG_(machine_get_VexArchInfo
)( &arch_host
, &archinfo_host
);
1930 VexEndness endness_host
= archinfo_host
.endness
;
1932 /* There are two different ways to do this.
1934 If the range fits within a single address-range equivalence
1935 class, as will be the case for a cache line sized invalidation,
1936 then we only have to inspect the set of translations listed in
1937 that equivalence class, and also in the "sin-bin" equivalence
1940 Otherwise, the invalidation is of a larger range and probably
1941 results from munmap. In this case it's (probably!) faster just
1942 to inspect all translations, dump those we don't want, and
1943 regenerate the equivalence class information (since modifying it
1944 in-situ is even more expensive).
1947 /* First off, figure out if the range falls within a single class,
1948 and if so which one. */
1951 if (range
< (1ULL << ECLASS_SHIFT
))
1952 ec
= range_to_eclass( guest_start
, (UInt
)range
);
1954 /* if ec is ECLASS_MISC then we aren't looking at just a single
1955 class, so use the slow scheme. Else use the fast scheme,
1956 examining 'ec' and ECLASS_MISC. */
1958 if (ec
!= ECLASS_MISC
) {
1960 VG_(debugLog
)(2, "transtab",
1961 " FAST, ec = %d\n", ec
);
1964 vg_assert(ec
>= 0 && ec
< ECLASS_MISC
);
1966 for (sno
= 0; sno
< n_sectors
; sno
++) {
1967 sec
= §ors
[sno
];
1968 if (sec
->tc
== NULL
)
1970 anyDeleted
|= delete_translations_in_sector_eclass(
1971 sec
, sno
, guest_start
, range
, ec
,
1972 arch_host
, endness_host
1974 anyDeleted
|= delete_translations_in_sector_eclass(
1975 sec
, sno
, guest_start
, range
, ECLASS_MISC
,
1976 arch_host
, endness_host
1984 VG_(debugLog
)(2, "transtab",
1985 " SLOW, ec = %d\n", ec
);
1987 for (sno
= 0; sno
< n_sectors
; sno
++) {
1988 sec
= §ors
[sno
];
1989 if (sec
->tc
== NULL
)
1991 anyDeleted
|= delete_translations_in_sector(
1992 sec
, sno
, guest_start
, range
,
1993 arch_host
, endness_host
2000 invalidateFastCache();
2002 /* don't forget the no-redir cache */
2003 unredir_discard_translations( guest_start
, range
);
2005 /* Post-deletion sanity check */
2006 if (VG_(clo_sanity_level
>= 4)) {
2009 Bool sane
= sanity_check_all_sectors();
2011 /* But now, also check the requested address range isn't
2012 present anywhere. */
2013 for (sno
= 0; sno
< n_sectors
; sno
++) {
2014 sec
= §ors
[sno
];
2015 if (sec
->tc
== NULL
)
2017 for (i
= 0; i
< N_TTES_PER_SECTOR
; i
++) {
2019 if (tte
->status
!= InUse
)
2021 vg_assert(!overlaps( guest_start
, range
, &tte
->vge
));
2028 /*------------------------------------------------------------*/
2029 /*--- AUXILIARY: the unredirected TT/TC ---*/
2030 /*------------------------------------------------------------*/
2032 /* A very simple translation cache which holds a small number of
2033 unredirected translations. This is completely independent of the
2034 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2035 both structures are simply dumped and we start over.
2037 Since these translations are unredirected, the search key is (by
2038 definition) the first address entry in the .vge field. */
2040 /* Sized to hold 500 translations of average size 1000 bytes. */
2042 #define UNREDIR_SZB 1000
2044 #define N_UNREDIR_TT 500
2045 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2049 VexGuestExtents vge
;
2055 /* We just allocate forwards in _tc, never deleting. */
2056 static ULong
*unredir_tc
;
2057 static Int unredir_tc_used
= N_UNREDIR_TCQ
;
2059 /* Slots in _tt can come into use and out again (.inUse).
2060 Nevertheless _tt_highwater is maintained so that invalidations
2061 don't have to scan all the slots when only a few are in use.
2062 _tt_highwater holds the index of the highest ever allocated
2064 static UTCEntry unredir_tt
[N_UNREDIR_TT
];
2065 static Int unredir_tt_highwater
;
2068 static void init_unredir_tt_tc ( void )
2071 if (unredir_tc
== NULL
) {
2072 SysRes sres
= VG_(am_mmap_anon_float_valgrind
)
2073 ( N_UNREDIR_TT
* UNREDIR_SZB
);
2074 if (sr_isError(sres
)) {
2075 VG_(out_of_memory_NORETURN
)("init_unredir_tt_tc",
2076 N_UNREDIR_TT
* UNREDIR_SZB
);
2079 unredir_tc
= (ULong
*)(AddrH
)sr_Res(sres
);
2081 unredir_tc_used
= 0;
2082 for (i
= 0; i
< N_UNREDIR_TT
; i
++)
2083 unredir_tt
[i
].inUse
= False
;
2084 unredir_tt_highwater
= -1;
2087 /* Do a sanity check; return False on failure. */
2088 static Bool
sanity_check_redir_tt_tc ( void )
2091 if (unredir_tt_highwater
< -1) return False
;
2092 if (unredir_tt_highwater
>= N_UNREDIR_TT
) return False
;
2094 for (i
= unredir_tt_highwater
+1; i
< N_UNREDIR_TT
; i
++)
2095 if (unredir_tt
[i
].inUse
)
2098 if (unredir_tc_used
< 0) return False
;
2099 if (unredir_tc_used
> N_UNREDIR_TCQ
) return False
;
2105 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2106 is temporarily in code[0 .. code_len-1].
2108 void VG_(add_to_unredir_transtab
)( const VexGuestExtents
* vge
,
2116 vg_assert(sanity_check_redir_tt_tc());
2118 /* This is the whole point: it's not redirected! */
2119 vg_assert(entry
== vge
->base
[0]);
2121 /* How many unredir_tt slots are needed */
2122 code_szQ
= (code_len
+ 7) / 8;
2124 /* Look for an empty unredir_tc slot */
2125 for (i
= 0; i
< N_UNREDIR_TT
; i
++)
2126 if (!unredir_tt
[i
].inUse
)
2129 if (i
>= N_UNREDIR_TT
|| code_szQ
> (N_UNREDIR_TCQ
- unredir_tc_used
)) {
2130 /* It's full; dump everything we currently have */
2131 init_unredir_tt_tc();
2135 vg_assert(unredir_tc_used
>= 0);
2136 vg_assert(unredir_tc_used
<= N_UNREDIR_TCQ
);
2137 vg_assert(code_szQ
> 0);
2138 vg_assert(code_szQ
+ unredir_tc_used
<= N_UNREDIR_TCQ
);
2139 vg_assert(i
>= 0 && i
< N_UNREDIR_TT
);
2140 vg_assert(unredir_tt
[i
].inUse
== False
);
2142 if (i
> unredir_tt_highwater
)
2143 unredir_tt_highwater
= i
;
2145 dstP
= (HChar
*)&unredir_tc
[unredir_tc_used
];
2146 srcP
= (HChar
*)code
;
2147 for (j
= 0; j
< code_len
; j
++)
2150 VG_(invalidate_icache
)( dstP
, code_len
);
2152 unredir_tt
[i
].inUse
= True
;
2153 unredir_tt
[i
].vge
= *vge
;
2154 unredir_tt
[i
].hcode
= (Addr
)dstP
;
2156 unredir_tc_used
+= code_szQ
;
2157 vg_assert(unredir_tc_used
>= 0);
2158 vg_assert(unredir_tc_used
<= N_UNREDIR_TCQ
);
2160 vg_assert(&dstP
[code_len
] <= (HChar
*)&unredir_tc
[unredir_tc_used
]);
2163 Bool
VG_(search_unredir_transtab
) ( /*OUT*/AddrH
* result
,
2167 for (i
= 0; i
< N_UNREDIR_TT
; i
++) {
2168 if (!unredir_tt
[i
].inUse
)
2170 if (unredir_tt
[i
].vge
.base
[0] == guest_addr
) {
2171 *result
= (AddrH
)unredir_tt
[i
].hcode
;
2178 static void unredir_discard_translations( Addr64 guest_start
, ULong range
)
2182 vg_assert(sanity_check_redir_tt_tc());
2184 for (i
= 0; i
<= unredir_tt_highwater
; i
++) {
2185 if (unredir_tt
[i
].inUse
2186 && overlaps( guest_start
, range
, &unredir_tt
[i
].vge
))
2187 unredir_tt
[i
].inUse
= False
;
2192 /*------------------------------------------------------------*/
2193 /*--- Initialisation. ---*/
2194 /*------------------------------------------------------------*/
2196 void VG_(init_tt_tc
) ( void )
2200 vg_assert(!init_done
);
2203 /* Otherwise lots of things go wrong... */
2204 vg_assert(sizeof(ULong
) == 8);
2205 vg_assert(sizeof(Addr64
) == 8);
2206 /* check fast cache entries really are 2 words long */
2207 vg_assert(sizeof(Addr
) == sizeof(void*));
2208 vg_assert(sizeof(FastCacheEntry
) == 2 * sizeof(Addr
));
2209 /* check fast cache entries are packed back-to-back with no spaces */
2210 vg_assert(sizeof( VG_(tt_fast
) ) == VG_TT_FAST_SIZE
* sizeof(FastCacheEntry
));
2211 /* check fast cache is aligned as we requested. Not fatal if it
2212 isn't, but we might as well make sure. */
2213 vg_assert(VG_IS_16_ALIGNED( ((Addr
) & VG_(tt_fast
)[0]) ));
2215 if (VG_(clo_verbosity
) > 2)
2216 VG_(message
)(Vg_DebugMsg
,
2217 "TT/TC: VG_(init_tt_tc) "
2218 "(startup of code management)\n");
2220 /* Figure out how big each tc area should be. */
2221 avg_codeszQ
= (VG_(details
).avg_translation_sizeB
+ 7) / 8;
2222 tc_sector_szQ
= N_TTES_PER_SECTOR_USABLE
* (1 + avg_codeszQ
);
2224 /* Ensure the calculated value is not way crazy. */
2225 vg_assert(tc_sector_szQ
>= 2 * N_TTES_PER_SECTOR_USABLE
);
2226 vg_assert(tc_sector_szQ
<= 100 * N_TTES_PER_SECTOR_USABLE
);
2228 n_sectors
= VG_(clo_num_transtab_sectors
);
2229 vg_assert(n_sectors
>= MIN_N_SECTORS
);
2230 vg_assert(n_sectors
<= MAX_N_SECTORS
);
2232 /* Initialise the sectors, even the ones we aren't going to use.
2233 Set all fields to zero. */
2234 youngest_sector
= 0;
2235 for (i
= 0; i
< MAX_N_SECTORS
; i
++)
2236 VG_(memset
)(§ors
[i
], 0, sizeof(sectors
[i
]));
2238 /* Initialise the sector_search_order hint table, including the
2239 entries we aren't going to use. */
2240 for (i
= 0; i
< MAX_N_SECTORS
; i
++)
2241 sector_search_order
[i
] = -1;
2243 /* Initialise the fast cache. */
2244 invalidateFastCache();
2246 /* and the unredir tt/tc */
2247 init_unredir_tt_tc();
2249 if (VG_(clo_verbosity
) > 2 || VG_(clo_stats
)
2250 || VG_(debugLog_getLevel
) () >= 2) {
2251 VG_(message
)(Vg_DebugMsg
,
2252 "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2253 n_sectors
, 8 * tc_sector_szQ
,
2254 n_sectors
* 8 * tc_sector_szQ
);
2255 VG_(message
)(Vg_DebugMsg
,
2256 "TT/TC: table: %d tables of %d bytes each = %d total\n",
2257 n_sectors
, (int)(N_TTES_PER_SECTOR
* sizeof(TTEntry
)),
2258 (int)(n_sectors
* N_TTES_PER_SECTOR
* sizeof(TTEntry
)));
2259 VG_(message
)(Vg_DebugMsg
,
2260 "TT/TC: table: %d entries each = %d total entries"
2261 " max occupancy %d (%d%%)\n",
2263 n_sectors
* N_TTES_PER_SECTOR
,
2264 n_sectors
* N_TTES_PER_SECTOR_USABLE
,
2265 SECTOR_TT_LIMIT_PERCENT
);
2270 /*------------------------------------------------------------*/
2271 /*--- Printing out statistics. ---*/
2272 /*------------------------------------------------------------*/
2274 static ULong
safe_idiv( ULong a
, ULong b
)
2276 return (b
== 0 ? 0 : a
/ b
);
2279 UInt
VG_(get_bbs_translated
) ( void )
2284 void VG_(print_tt_tc_stats
) ( void )
2286 VG_(message
)(Vg_DebugMsg
,
2287 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2288 n_full_lookups
, n_lookup_probes
);
2289 VG_(message
)(Vg_DebugMsg
,
2290 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2291 n_fast_updates
, n_fast_flushes
);
2293 VG_(message
)(Vg_DebugMsg
,
2294 " transtab: new %'lld "
2295 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2296 n_in_count
, n_in_osize
, n_in_tsize
,
2297 safe_idiv(10*n_in_tsize
, n_in_osize
),
2299 VG_(message
)(Vg_DebugMsg
,
2300 " transtab: dumped %'llu (%'llu -> ?" "?)\n",
2301 n_dump_count
, n_dump_osize
);
2302 VG_(message
)(Vg_DebugMsg
,
2303 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2304 n_disc_count
, n_disc_osize
);
2306 if (DEBUG_TRANSTAB
) {
2309 for (i
= 0; i
< ECLASS_N
; i
++) {
2310 VG_(printf
)(" %4d", sectors
[0].ec2tte_used
[i
]);
2314 VG_(printf
)("\n\n");
2318 /*------------------------------------------------------------*/
2319 /*--- Printing out of profiling results. ---*/
2320 /*------------------------------------------------------------*/
2322 static ULong
score ( const TTEntry
* tte
)
2324 return ((ULong
)tte
->weight
) * ((ULong
)tte
->count
);
2327 ULong
VG_(get_SB_profile
) ( SBProfEntry tops
[], UInt n_tops
)
2332 /* First, compute the total weighted count, and find the top N
2333 ttes. tops contains pointers to the most-used n_tops blocks, in
2334 descending order (viz, tops[0] is the highest scorer). */
2335 for (i
= 0; i
< n_tops
; i
++) {
2342 for (sno
= 0; sno
< n_sectors
; sno
++) {
2343 if (sectors
[sno
].tc
== NULL
)
2345 for (i
= 0; i
< N_TTES_PER_SECTOR
; i
++) {
2346 if (sectors
[sno
].tt
[i
].status
!= InUse
)
2348 score_total
+= score(§ors
[sno
].tt
[i
]);
2349 /* Find the rank for sectors[sno].tt[i]. */
2354 if (tops
[r
].addr
== 0) {
2358 if ( score(§ors
[sno
].tt
[i
]) > tops
[r
].score
) {
2365 vg_assert(r
>= 0 && r
<= n_tops
);
2366 /* This bb should be placed at r, and bbs above it shifted
2367 upwards one slot. */
2369 for (s
= n_tops
-1; s
> r
; s
--)
2370 tops
[s
] = tops
[s
-1];
2371 tops
[r
].addr
= sectors
[sno
].tt
[i
].entry
;
2372 tops
[r
].score
= score( §ors
[sno
].tt
[i
] );
2377 /* Now zero out all the counter fields, so that we can make
2378 multiple calls here and just get the values since the last call,
2379 each time, rather than values accumulated for the whole run. */
2380 for (sno
= 0; sno
< n_sectors
; sno
++) {
2381 if (sectors
[sno
].tc
== NULL
)
2383 for (i
= 0; i
< N_TTES_PER_SECTOR
; i
++) {
2384 if (sectors
[sno
].tt
[i
].status
!= InUse
)
2386 sectors
[sno
].tt
[i
].count
= 0;
2393 /*--------------------------------------------------------------------*/
2395 /*--------------------------------------------------------------------*/