New Option --avg-transtab-entry-size=<number> can be used to tune
[valgrind.git] / coregrind / m_transtab.c
blobf28c3f338bffe6fbf0ab84d250cf9efa8b669383
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2000-2013 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
49 #define DEBUG_TRANSTAB 0
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static UInt n_sectors = 0;
62 /* Average size of a transtab code entry. 0 means to use the tool
63 provided default. */
64 UInt VG_(clo_avg_transtab_entry_size) = 0;
66 /*------------------ CONSTANTS ------------------*/
67 /* Number of TC entries in each sector. This needs to be a prime
68 number to work properly, it must be <= 65535 (so that a TT index
69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
70 'deleted') and it is strongly recommended not to change this.
71 65521 is the largest prime <= 65535. */
72 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
74 /* Because each sector contains a hash table of TTEntries, we need to
75 specify the maximum allowable loading, after which the sector is
76 deemed full. */
77 #define SECTOR_TT_LIMIT_PERCENT 65
79 /* The sector is deemed full when this many entries are in it. */
80 #define N_TTES_PER_SECTOR_USABLE \
81 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
83 /* Equivalence classes for fast address range deletion. There are 1 +
84 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
85 address range which does not fall cleanly within any specific bin.
86 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
87 #define ECLASS_SHIFT 11
88 #define ECLASS_WIDTH 8
89 #define ECLASS_MISC (1 << ECLASS_WIDTH)
90 #define ECLASS_N (1 + ECLASS_MISC)
92 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
95 /*------------------ TYPES ------------------*/
97 /* In edges ("to-me") in the graph created by chaining. */
98 typedef
99 struct {
100 UInt from_sNo; /* sector number */
101 UInt from_tteNo; /* TTE number in given sector */
102 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */
103 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */
105 InEdge;
108 /* Out edges ("from-me") in the graph created by chaining. */
109 typedef
110 struct {
111 UInt to_sNo; /* sector number */
112 UInt to_tteNo; /* TTE number in given sector */
113 UInt from_offs; /* code offset in owning translation where patch is */
115 OutEdge;
118 #define N_FIXED_IN_EDGE_ARR 3
119 typedef
120 struct {
121 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
122 InEdge fixed[N_FIXED_IN_EDGE_ARR];
123 XArray* var; /* XArray* of InEdgeArr */
125 InEdgeArr;
127 #define N_FIXED_OUT_EDGE_ARR 2
128 typedef
129 struct {
130 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
131 OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
132 XArray* var; /* XArray* of OutEdgeArr */
134 OutEdgeArr;
137 /* A translation-table entry. This indicates precisely which areas of
138 guest code are included in the translation, and contains all other
139 auxiliary info too. */
140 typedef
141 struct {
142 /* Profiling only: the count and weight (arbitrary meaning) for
143 this translation. Weight is a property of the translation
144 itself and computed once when the translation is created.
145 Count is an entry count for the translation and is
146 incremented by 1 every time the translation is used, if we
147 are profiling. */
148 ULong count;
149 UShort weight;
151 /* Status of the slot. Note, we need to be able to do lazy
152 deletion, hence the Deleted state. */
153 enum { InUse, Deleted, Empty } status;
155 /* 64-bit aligned pointer to one or more 64-bit words containing
156 the corresponding host code (must be in the same sector!)
157 This is a pointer into the sector's tc (code) area. */
158 ULong* tcptr;
160 /* This is the original guest address that purportedly is the
161 entry point of the translation. You might think that .entry
162 should be the same as .vge->base[0], and most of the time it
163 is. However, when doing redirections, that is not the case.
164 .vge must always correctly describe the guest code sections
165 from which this translation was made. However, .entry may or
166 may not be a lie, depending on whether or not we're doing
167 redirection. */
168 Addr entry;
170 /* This structure describes precisely what ranges of guest code
171 the translation covers, so we can decide whether or not to
172 delete it when translations of a given address range are
173 invalidated. */
174 VexGuestExtents vge;
176 /* Address range summary info: these are pointers back to
177 eclass[] entries in the containing Sector. Those entries in
178 turn point back here -- the two structures are mutually
179 redundant but both necessary to make fast deletions work.
180 The eclass info is similar to, and derived from, this entry's
181 'vge' field, but it is not the same */
182 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
183 UShort tte2ec_ec[3]; // for each, the eclass #
184 UInt tte2ec_ix[3]; // and the index within the eclass.
185 // for i in 0 .. n_tte2ec-1
186 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
187 // should be the index
188 // of this TTEntry in the containing Sector's tt array.
190 /* Admin information for chaining. 'in_edges' is a set of the
191 patch points which jump to this translation -- hence are
192 predecessors in the control flow graph. 'out_edges' points
193 to successors in the control flow graph -- translations to
194 which this one has a patched jump. In short these are just
195 backwards and forwards edges in the graph of patched-together
196 blocks. The 'in_edges' contain slightly more info, enough
197 that we can undo the chaining of each mentioned patch point.
198 The 'out_edges' list exists only so that we can visit the
199 'in_edges' entries of all blocks we're patched through to, in
200 order to remove ourselves from then when we're deleted. */
202 /* A translation can disappear for two reasons:
203 1. erased (as part of the oldest sector cleanup) when the
204 youngest sector is full.
205 2. discarded due to calls to VG_(discard_translations).
206 VG_(discard_translations) sets the status of the
207 translation to 'Deleted'.
208 A.o., the gdbserver discards one or more translations
209 when a breakpoint is inserted or removed at an Addr,
210 or when single stepping mode is enabled/disabled
211 or when a translation is instrumented for gdbserver
212 (all the target jumps of this translation are
213 invalidated).
215 So, it is possible that the translation A to be patched
216 (to obtain a patched jump from A to B) is invalidated
217 after B is translated and before A is patched.
218 In case a translation is erased or discarded, the patching
219 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
220 are checking the 'from' translation still exists before
221 doing the patching.
223 Is it safe to erase or discard the current translation E being
224 executed ? Amazing, but yes, it is safe.
225 Here is the explanation:
227 The translation E being executed can only be erased if a new
228 translation N is being done. A new translation is done only
229 if the host addr is a not yet patched jump to another
230 translation. In such a case, the guest address of N is
231 assigned to the PC in the VEX state. Control is returned
232 to the scheduler. N will be translated. This can erase the
233 translation E (in case of sector full). VG_(tt_tc_do_chaining)
234 will not do the chaining to a non found translation E.
235 The execution will continue at the current guest PC
236 (i.e. the translation N).
237 => it is safe to erase the current translation being executed.
239 The current translation E being executed can also be discarded
240 (e.g. by gdbserver). VG_(discard_translations) will mark
241 this translation E as Deleted, but the translation itself
242 is not erased. In particular, its host code can only
243 be overwritten or erased in case a new translation is done.
244 A new translation will only be done if a not yet translated
245 jump is to be executed. The execution of the Deleted translation
246 E will continue till a non patched jump is encountered.
247 This situation is then similar to the 'erasing' case above :
248 the current translation E can be erased or overwritten, as the
249 execution will continue at the new translation N.
253 /* It is possible, although very unlikely, that a block A has
254 more than one patched jump to block B. This could happen if
255 (eg) A finishes "jcond B; jmp B".
257 This means in turn that B's in_edges set can list A more than
258 once (twice in this example). However, each such entry must
259 have a different from_offs, since a patched jump can only
260 jump to one place at once (it's meaningless for it to have
261 multiple destinations.) IOW, the successor and predecessor
262 edges in the graph are not uniquely determined by a
263 TTEntry --> TTEntry pair, but rather by a
264 (TTEntry,offset) --> TTEntry triple.
266 If A has multiple edges to B then B will mention A multiple
267 times in its in_edges. To make things simpler, we then
268 require that A mentions B exactly the same number of times in
269 its out_edges. Furthermore, a matching out-in pair must have
270 the same offset (from_offs). This facilitates sanity
271 checking, and it facilitates establishing the invariant that
272 a out_edges set may not have duplicates when using the
273 equality defined by (TTEntry,offset). Hence the out_edges
274 and in_edges sets really do have both have set semantics.
276 eg if A has been patched to B at offsets 42 and 87 (in A)
277 then A.out_edges = { (B,42), (B,87) } (in any order)
278 and B.in_edges = { (A,42), (A,87) } (in any order)
280 Hence for each node pair P->Q in the graph, there's a 1:1
281 mapping between P.out_edges and Q.in_edges.
283 InEdgeArr in_edges;
284 OutEdgeArr out_edges;
286 TTEntry;
289 /* A structure used for mapping host code addresses back to the
290 relevant TTEntry. Used when doing chaining, for finding the
291 TTEntry to which some arbitrary patch address belongs. */
292 typedef
293 struct {
294 UChar* start;
295 UInt len;
296 UInt tteNo;
298 HostExtent;
300 /* Finally, a sector itself. Each sector contains an array of
301 TCEntries, which hold code, and an array of TTEntries, containing
302 all required administrative info. Profiling is supported using the
303 TTEntry .count and .weight fields, if required.
305 If the sector is not in use, all three pointers are NULL and
306 tt_n_inuse is zero.
308 typedef
309 struct {
310 /* The TCEntry area. Size of this depends on the average
311 translation size. We try and size it so it becomes full
312 precisely when this sector's translation table (tt) reaches
313 its load limit (SECTOR_TT_LIMIT_PERCENT). */
314 ULong* tc;
316 /* The TTEntry array. This is a fixed size, always containing
317 exactly N_TTES_PER_SECTOR entries. */
318 TTEntry* tt;
320 /* This points to the current allocation point in tc. */
321 ULong* tc_next;
323 /* The count of tt entries with state InUse. */
324 Int tt_n_inuse;
326 /* Expandable arrays of tt indices for each of the ECLASS_N
327 address range equivalence classes. These hold indices into
328 the containing sector's tt array, which in turn should point
329 back here. */
330 Int ec2tte_size[ECLASS_N];
331 Int ec2tte_used[ECLASS_N];
332 UShort* ec2tte[ECLASS_N];
334 /* The host extents. The [start, +len) ranges are constructed
335 in strictly non-overlapping order, so we can binary search
336 them at any time. */
337 XArray* host_extents; /* XArray* of HostExtent */
339 Sector;
342 /*------------------ DECLS ------------------*/
344 /* The root data structure is an array of sectors. The index of the
345 youngest sector is recorded, and new translations are put into that
346 sector. When it fills up, we move along to the next sector and
347 start to fill that up, wrapping around at the end of the array.
348 That way, once all N_TC_SECTORS have been bought into use for the
349 first time, and are full, we then re-use the oldest sector,
350 endlessly.
352 When running, youngest sector should be between >= 0 and <
353 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is
354 not yet initialised.
356 static Sector sectors[MAX_N_SECTORS];
357 static Int youngest_sector = -1;
359 /* The number of ULongs in each TCEntry area. This is computed once
360 at startup and does not change. */
361 static Int tc_sector_szQ = 0;
364 /* A list of sector numbers, in the order which they should be
365 searched to find translations. This is an optimisation to be used
366 when searching for translations and should not affect
367 correctness. -1 denotes "no entry". */
368 static Int sector_search_order[MAX_N_SECTORS];
371 /* Fast helper for the TC. A direct-mapped cache which holds a set of
372 recently used (guest address, host address) pairs. This array is
373 referred to directly from m_dispatch/dispatch-<platform>.S.
375 Entries in tt_fast may refer to any valid TC entry, regardless of
376 which sector it's in. Consequently we must be very careful to
377 invalidate this cache when TC entries are changed or disappear.
379 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
380 pointed at to cause that cache entry to miss. This relies on the
381 assumption that no guest code actually has that address, hence a
382 value 0x1 seems good. m_translate gives the client a synthetic
383 segfault if it tries to execute at this address.
386 typedef
387 struct {
388 Addr guest;
389 Addr host;
391 FastCacheEntry;
393 /*global*/ __attribute__((aligned(16)))
394 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
396 /* Make sure we're not used before initialisation. */
397 static Bool init_done = False;
400 /*------------------ STATS DECLS ------------------*/
402 /* Number of fast-cache updates and flushes done. */
403 static ULong n_fast_flushes = 0;
404 static ULong n_fast_updates = 0;
406 /* Number of full lookups done. */
407 static ULong n_full_lookups = 0;
408 static ULong n_lookup_probes = 0;
410 /* Number/osize/tsize of translations entered; also the number of
411 those for which self-checking was requested. */
412 static ULong n_in_count = 0;
413 static ULong n_in_osize = 0;
414 static ULong n_in_tsize = 0;
415 static ULong n_in_sc_count = 0;
417 /* Number/osize of translations discarded due to lack of space. */
418 static ULong n_dump_count = 0;
419 static ULong n_dump_osize = 0;
420 static ULong n_sectors_recycled = 0;
422 /* Number/osize of translations discarded due to requests to do so. */
423 static ULong n_disc_count = 0;
424 static ULong n_disc_osize = 0;
427 /*-------------------------------------------------------------*/
428 /*--- Misc ---*/
429 /*-------------------------------------------------------------*/
431 static void* ttaux_malloc ( const HChar* tag, SizeT n )
433 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
436 static void ttaux_free ( void* p )
438 VG_(arena_free)(VG_AR_TTAUX, p);
442 /*-------------------------------------------------------------*/
443 /*--- Chaining support ---*/
444 /*-------------------------------------------------------------*/
446 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
448 vg_assert(sNo < n_sectors);
449 vg_assert(tteNo < N_TTES_PER_SECTOR);
450 Sector* s = &sectors[sNo];
451 vg_assert(s->tt);
452 TTEntry* tte = &s->tt[tteNo];
453 vg_assert(tte->status == InUse);
454 return tte;
457 static void InEdge__init ( InEdge* ie )
459 ie->from_sNo = -1; /* invalid */
460 ie->from_tteNo = 0;
461 ie->from_offs = 0;
462 ie->to_fastEP = False;
465 static void OutEdge__init ( OutEdge* oe )
467 oe->to_sNo = -1; /* invalid */
468 oe->to_tteNo = 0;
469 oe->from_offs = 0;
472 static void TTEntry__init ( TTEntry* tte )
474 VG_(memset)(tte, 0, sizeof(*tte));
477 static UWord InEdgeArr__size ( const InEdgeArr* iea )
479 if (iea->var) {
480 vg_assert(iea->n_fixed == 0);
481 return VG_(sizeXA)(iea->var);
482 } else {
483 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
484 return iea->n_fixed;
488 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
490 if (iea->var) {
491 vg_assert(iea->n_fixed == 0);
492 VG_(deleteXA)(iea->var);
493 iea->var = NULL;
494 } else {
495 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
496 iea->n_fixed = 0;
500 static
501 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
503 if (iea->var) {
504 vg_assert(iea->n_fixed == 0);
505 return (InEdge*)VG_(indexXA)(iea->var, i);
506 } else {
507 vg_assert(i < iea->n_fixed);
508 return &iea->fixed[i];
512 static
513 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
515 if (iea->var) {
516 vg_assert(iea->n_fixed == 0);
517 VG_(removeIndexXA)(iea->var, i);
518 } else {
519 vg_assert(i < iea->n_fixed);
520 for (; i+1 < iea->n_fixed; i++) {
521 iea->fixed[i] = iea->fixed[i+1];
523 iea->n_fixed--;
527 static
528 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
530 if (iea->var) {
531 vg_assert(iea->n_fixed == 0);
532 VG_(addToXA)(iea->var, ie);
533 } else {
534 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
535 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
536 /* The fixed array is full, so we have to initialise an
537 XArray and copy the fixed array into it. */
538 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
539 ttaux_free,
540 sizeof(InEdge));
541 UWord i;
542 for (i = 0; i < iea->n_fixed; i++) {
543 VG_(addToXA)(iea->var, &iea->fixed[i]);
545 VG_(addToXA)(iea->var, ie);
546 iea->n_fixed = 0;
547 } else {
548 /* Just add to the fixed array. */
549 iea->fixed[iea->n_fixed++] = *ie;
554 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
556 if (oea->var) {
557 vg_assert(oea->n_fixed == 0);
558 return VG_(sizeXA)(oea->var);
559 } else {
560 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
561 return oea->n_fixed;
565 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
567 if (oea->var) {
568 vg_assert(oea->n_fixed == 0);
569 VG_(deleteXA)(oea->var);
570 oea->var = NULL;
571 } else {
572 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
573 oea->n_fixed = 0;
577 static
578 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
580 if (oea->var) {
581 vg_assert(oea->n_fixed == 0);
582 return (OutEdge*)VG_(indexXA)(oea->var, i);
583 } else {
584 vg_assert(i < oea->n_fixed);
585 return &oea->fixed[i];
589 static
590 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
592 if (oea->var) {
593 vg_assert(oea->n_fixed == 0);
594 VG_(removeIndexXA)(oea->var, i);
595 } else {
596 vg_assert(i < oea->n_fixed);
597 for (; i+1 < oea->n_fixed; i++) {
598 oea->fixed[i] = oea->fixed[i+1];
600 oea->n_fixed--;
604 static
605 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
607 if (oea->var) {
608 vg_assert(oea->n_fixed == 0);
609 VG_(addToXA)(oea->var, oe);
610 } else {
611 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
612 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
613 /* The fixed array is full, so we have to initialise an
614 XArray and copy the fixed array into it. */
615 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
616 ttaux_free,
617 sizeof(OutEdge));
618 UWord i;
619 for (i = 0; i < oea->n_fixed; i++) {
620 VG_(addToXA)(oea->var, &oea->fixed[i]);
622 VG_(addToXA)(oea->var, oe);
623 oea->n_fixed = 0;
624 } else {
625 /* Just add to the fixed array. */
626 oea->fixed[oea->n_fixed++] = *oe;
631 static
632 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
634 const HostExtent* hx1 = v1;
635 const HostExtent* hx2 = v2;
636 if (hx1->start + hx1->len <= hx2->start) return -1;
637 if (hx2->start + hx2->len <= hx1->start) return 1;
638 return 0; /* partial overlap */
641 /* True if hx is a dead host extent, i.e. corresponds to host code
642 of an entry that was invalidated. */
643 static
644 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
646 const UInt tteNo = hx->tteNo;
647 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
648 VG_(printf) (m \
649 " start 0x%p len %u sector %d ttslot %u" \
650 " tt.entry 0x%lu tt.tcptr 0x%p\n", \
651 hx->start, hx->len, (int)(sec - sectors), \
652 hx->tteNo, \
653 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
655 /* Entry might have been invalidated and not re-used yet.*/
656 if (sec->tt[tteNo].status == Deleted) {
657 LDEBUG("found deleted entry");
658 return True;
660 /* Maybe we found this entry via a host_extents which was
661 inserted for an entry which was changed to Deleted then
662 re-used after. If this entry was re-used, then its tcptr
663 is >= to host_extents start (i.e. the previous tcptr) + len.
664 This is the case as there is no re-use of host code: a new
665 entry or re-used entry always gets "higher value" host code. */
666 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
667 LDEBUG("found re-used entry");
668 return True;
671 return False;
672 #undef LDEBUG
675 static __attribute__((noinline))
676 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
677 /*OUT*/UInt* from_tteNo,
678 void* hcode )
680 Int i;
682 /* Search order logic copied from VG_(search_transtab). */
683 for (i = 0; i < n_sectors; i++) {
684 Int sno = sector_search_order[i];
685 if (UNLIKELY(sno == -1))
686 return False; /* run out of sectors to search */
688 const Sector* sec = &sectors[sno];
689 const XArray* /* of HostExtent */ host_extents = sec->host_extents;
690 vg_assert(host_extents);
692 HostExtent key;
693 VG_(memset)(&key, 0, sizeof(key));
694 key.start = hcode;
695 key.len = 1;
696 Word firstW = -1, lastW = -1;
697 Bool found = VG_(lookupXA_UNSAFE)(
698 host_extents, &key, &firstW, &lastW,
699 HostExtent__cmpOrd );
700 vg_assert(firstW == lastW); // always true, even if not found
701 if (found) {
702 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
703 UInt tteNo = hx->tteNo;
704 /* Do some additional sanity checks. */
705 vg_assert(tteNo <= N_TTES_PER_SECTOR);
707 /* if this hx entry corresponds to dead host code, we must
708 tell this code has not been found, as it cannot be patched. */
709 if (HostExtent__is_dead (hx, sec))
710 return False;
712 vg_assert(sec->tt[tteNo].status == InUse);
713 /* Can only half check that the found TTEntry contains hcode,
714 due to not having a length value for the hcode in the
715 TTEntry. */
716 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
717 /* Looks plausible */
718 *from_sNo = sno;
719 *from_tteNo = (UInt)tteNo;
720 return True;
723 return False;
727 /* Figure out whether or not hcode is jitted code present in the main
728 code cache (but not in the no-redir cache). Used for sanity
729 checking. */
730 static Bool is_in_the_main_TC ( const void* hcode )
732 Int i, sno;
733 for (i = 0; i < n_sectors; i++) {
734 sno = sector_search_order[i];
735 if (sno == -1)
736 break; /* run out of sectors to search */
737 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
738 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
739 + sizeof(ULong) - 1)
740 return True;
742 return False;
746 /* Fulfill a chaining request, and record admin info so we
747 can undo it later, if required.
749 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
750 UInt to_sNo,
751 UInt to_tteNo,
752 Bool to_fastEP )
754 /* Get the CPU info established at startup. */
755 VexArch arch_host = VexArch_INVALID;
756 VexArchInfo archinfo_host;
757 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
758 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
759 VexEndness endness_host = archinfo_host.endness;
761 // host_code is where we're patching to. So it needs to
762 // take into account, whether we're jumping to the slow
763 // or fast entry point. By definition, the fast entry point
764 // is exactly one event check's worth of code along from
765 // the slow (tcptr) entry point.
766 TTEntry* to_tte = index_tte(to_sNo, to_tteNo);
767 void* host_code = ((UChar*)to_tte->tcptr)
768 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
770 // stay sane -- the patch point (dst) is in this sector's code cache
771 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
772 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
773 + sizeof(ULong) - 1 );
775 /* Find the TTEntry for the from__ code. This isn't simple since
776 we only know the patch address, which is going to be somewhere
777 inside the from_ block. */
778 UInt from_sNo = (UInt)-1;
779 UInt from_tteNo = (UInt)-1;
780 Bool from_found
781 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
782 from__patch_addr );
783 if (!from_found) {
784 // The from code might have been discarded due to sector re-use
785 // or marked Deleted due to translation invalidation.
786 // In such a case, don't do the chaining.
787 VG_(debugLog)(1,"transtab",
788 "host code %p not found (discarded? sector recycled?)"
789 " => no chaining done\n",
790 from__patch_addr);
791 return;
794 TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
796 /* Get VEX to do the patching itself. We have to hand it off
797 since it is host-dependent. */
798 VexInvalRange vir
799 = LibVEX_Chain(
800 arch_host, endness_host,
801 from__patch_addr,
802 VG_(fnptr_to_fnentry)(
803 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
804 : &VG_(disp_cp_chain_me_to_slowEP)),
805 (void*)host_code
807 VG_(invalidate_icache)( (void*)vir.start, vir.len );
809 /* Now do the tricky bit -- update the ch_succs and ch_preds info
810 for the two translations involved, so we can undo the chaining
811 later, which we will have to do if the to_ block gets removed
812 for whatever reason. */
814 /* This is the new from_ -> to_ link to add. */
815 InEdge ie;
816 InEdge__init(&ie);
817 ie.from_sNo = from_sNo;
818 ie.from_tteNo = from_tteNo;
819 ie.to_fastEP = to_fastEP;
820 HWord from_offs = (HWord)( (UChar*)from__patch_addr
821 - (UChar*)from_tte->tcptr );
822 vg_assert(from_offs < 100000/* let's say */);
823 ie.from_offs = (UInt)from_offs;
825 /* This is the new to_ -> from_ backlink to add. */
826 OutEdge oe;
827 OutEdge__init(&oe);
828 oe.to_sNo = to_sNo;
829 oe.to_tteNo = to_tteNo;
830 oe.from_offs = (UInt)from_offs;
832 /* Add .. */
833 InEdgeArr__add(&to_tte->in_edges, &ie);
834 OutEdgeArr__add(&from_tte->out_edges, &oe);
838 /* Unchain one patch, as described by the specified InEdge. For
839 sanity check purposes only (to check that the patched location is
840 as expected) it also requires the fast and slow entry point
841 addresses of the destination block (that is, the block that owns
842 this InEdge). */
843 __attribute__((noinline))
844 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
845 InEdge* ie,
846 void* to_fastEPaddr, void* to_slowEPaddr )
848 vg_assert(ie);
849 TTEntry* tte
850 = index_tte(ie->from_sNo, ie->from_tteNo);
851 UChar* place_to_patch
852 = ((UChar*)tte->tcptr) + ie->from_offs;
853 UChar* disp_cp_chain_me
854 = VG_(fnptr_to_fnentry)(
855 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
856 : &VG_(disp_cp_chain_me_to_slowEP)
858 UChar* place_to_jump_to_EXPECTED
859 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
861 // stay sane: both src and dst for this unchaining are
862 // in the main code cache
863 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
864 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
865 // dst check is ok because LibVEX_UnChain checks that
866 // place_to_jump_to_EXPECTED really is the current dst, and
867 // asserts if it isn't.
868 VexInvalRange vir
869 = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
870 place_to_jump_to_EXPECTED, disp_cp_chain_me );
871 VG_(invalidate_icache)( (void*)vir.start, vir.len );
875 /* The specified block is about to be deleted. Update the preds and
876 succs of its associated blocks accordingly. This includes undoing
877 any chained jumps to this block. */
878 static
879 void unchain_in_preparation_for_deletion ( VexArch arch_host,
880 VexEndness endness_host,
881 UInt here_sNo, UInt here_tteNo )
883 if (DEBUG_TRANSTAB)
884 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
885 UWord i, j, n, m;
886 Int evCheckSzB = LibVEX_evCheckSzB(arch_host);
887 TTEntry* here_tte = index_tte(here_sNo, here_tteNo);
888 if (DEBUG_TRANSTAB)
889 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
890 here_tte->entry, here_tte->tcptr);
891 vg_assert(here_tte->status == InUse);
893 /* Visit all InEdges owned by here_tte. */
894 n = InEdgeArr__size(&here_tte->in_edges);
895 for (i = 0; i < n; i++) {
896 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
897 // Undo the chaining.
898 UChar* here_slow_EP = (UChar*)here_tte->tcptr;
899 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
900 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
901 // Find the corresponding entry in the "from" node's out_edges,
902 // and remove it.
903 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
904 m = OutEdgeArr__size(&from_tte->out_edges);
905 vg_assert(m > 0); // it must have at least one entry
906 for (j = 0; j < m; j++) {
907 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
908 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
909 && oe->from_offs == ie->from_offs)
910 break;
912 vg_assert(j < m); // "oe must be findable"
913 OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
916 /* Visit all OutEdges owned by here_tte. */
917 n = OutEdgeArr__size(&here_tte->out_edges);
918 for (i = 0; i < n; i++) {
919 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
920 // Find the corresponding entry in the "to" node's in_edges,
921 // and remove it.
922 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
923 m = InEdgeArr__size(&to_tte->in_edges);
924 vg_assert(m > 0); // it must have at least one entry
925 for (j = 0; j < m; j++) {
926 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
927 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
928 && ie->from_offs == oe->from_offs)
929 break;
931 vg_assert(j < m); // "ie must be findable"
932 InEdgeArr__deleteIndex(&to_tte->in_edges, j);
935 InEdgeArr__makeEmpty(&here_tte->in_edges);
936 OutEdgeArr__makeEmpty(&here_tte->out_edges);
940 /*-------------------------------------------------------------*/
941 /*--- Address-range equivalence class stuff ---*/
942 /*-------------------------------------------------------------*/
944 /* Return equivalence class number for a range. */
946 static Int range_to_eclass ( Addr start, UInt len )
948 UInt mask = (1 << ECLASS_WIDTH) - 1;
949 UInt lo = (UInt)start;
950 UInt hi = lo + len - 1;
951 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
952 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
953 if (loBits == hiBits) {
954 vg_assert(loBits < ECLASS_N-1);
955 return loBits;
956 } else {
957 return ECLASS_MISC;
962 /* Calculates the equivalence class numbers for any VexGuestExtent.
963 These are written in *eclasses, which must be big enough to hold 3
964 Ints. The number written, between 1 and 3, is returned. The
965 eclasses are presented in order, and any duplicates are removed.
968 static
969 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
970 const VexGuestExtents* vge )
973 # define SWAP(_lv1,_lv2) \
974 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
976 Int i, j, n_ec, r;
978 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
980 n_ec = 0;
981 for (i = 0; i < vge->n_used; i++) {
982 r = range_to_eclass( vge->base[i], vge->len[i] );
983 if (r == ECLASS_MISC)
984 goto bad;
985 /* only add if we haven't already seen it */
986 for (j = 0; j < n_ec; j++)
987 if (eclasses[j] == r)
988 break;
989 if (j == n_ec)
990 eclasses[n_ec++] = r;
993 if (n_ec == 1)
994 return 1;
996 if (n_ec == 2) {
997 /* sort */
998 if (eclasses[0] > eclasses[1])
999 SWAP(eclasses[0], eclasses[1]);
1000 return 2;
1003 if (n_ec == 3) {
1004 /* sort */
1005 if (eclasses[0] > eclasses[2])
1006 SWAP(eclasses[0], eclasses[2]);
1007 if (eclasses[0] > eclasses[1])
1008 SWAP(eclasses[0], eclasses[1]);
1009 if (eclasses[1] > eclasses[2])
1010 SWAP(eclasses[1], eclasses[2]);
1011 return 3;
1014 /* NOTREACHED */
1015 vg_assert(0);
1017 bad:
1018 eclasses[0] = ECLASS_MISC;
1019 return 1;
1021 # undef SWAP
1025 /* Add tteno to the set of entries listed for equivalence class ec in
1026 this sector. Returns used location in eclass array. */
1028 static
1029 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
1031 Int old_sz, new_sz, i, r;
1032 UShort *old_ar, *new_ar;
1034 vg_assert(ec >= 0 && ec < ECLASS_N);
1035 vg_assert(tteno < N_TTES_PER_SECTOR);
1037 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1039 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1041 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1043 old_sz = sec->ec2tte_size[ec];
1044 old_ar = sec->ec2tte[ec];
1045 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1046 new_ar = ttaux_malloc("transtab.aECN.1",
1047 new_sz * sizeof(UShort));
1048 for (i = 0; i < old_sz; i++)
1049 new_ar[i] = old_ar[i];
1050 if (old_ar)
1051 ttaux_free(old_ar);
1052 sec->ec2tte_size[ec] = new_sz;
1053 sec->ec2tte[ec] = new_ar;
1055 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1058 /* Common case */
1059 r = sec->ec2tte_used[ec]++;
1060 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1061 sec->ec2tte[ec][r] = tteno;
1062 return (UInt)r;
1066 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1067 eclass entries to 'sec'. */
1069 static
1070 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1072 Int i, r, eclasses[3];
1073 TTEntry* tte;
1074 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1076 tte = &sec->tt[tteno];
1077 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1079 vg_assert(r >= 1 && r <= 3);
1080 tte->n_tte2ec = r;
1082 for (i = 0; i < r; i++) {
1083 tte->tte2ec_ec[i] = eclasses[i];
1084 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1089 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1090 True if OK, False if something's not right. Expensive. */
1092 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1094 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1096 const HChar* whassup = NULL;
1097 Int i, j, k, n, ec_num, ec_idx;
1098 TTEntry* tte;
1099 UShort tteno;
1100 ULong* tce;
1102 /* Basic checks on this sector */
1103 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1104 BAD("invalid sec->tt_n_inuse");
1105 tce = sec->tc_next;
1106 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1107 BAD("sec->tc_next points outside tc");
1109 /* For each eclass ... */
1110 for (i = 0; i < ECLASS_N; i++) {
1111 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1112 BAD("ec2tte_size/ec2tte mismatch(1)");
1113 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1114 BAD("ec2tte_size/ec2tte mismatch(2)");
1115 if (sec->ec2tte_used[i] < 0
1116 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1117 BAD("implausible ec2tte_used");
1118 if (sec->ec2tte_used[i] == 0)
1119 continue;
1121 /* For each tt reference in each eclass .. ensure the reference
1122 is to a valid tt entry, and that the entry's address ranges
1123 really include this eclass. */
1125 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1126 tteno = sec->ec2tte[i][j];
1127 if (tteno == EC2TTE_DELETED)
1128 continue;
1129 if (tteno >= N_TTES_PER_SECTOR)
1130 BAD("implausible tteno");
1131 tte = &sec->tt[tteno];
1132 if (tte->status != InUse)
1133 BAD("tteno points to non-inuse tte");
1134 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1135 BAD("tte->n_tte2ec out of range");
1136 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1137 must equal i. Inspect tte's eclass info. */
1138 n = 0;
1139 for (k = 0; k < tte->n_tte2ec; k++) {
1140 if (k < tte->n_tte2ec-1
1141 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1142 BAD("tte->tte2ec_ec[..] out of order");
1143 ec_num = tte->tte2ec_ec[k];
1144 if (ec_num < 0 || ec_num >= ECLASS_N)
1145 BAD("tte->tte2ec_ec[..] out of range");
1146 if (ec_num != i)
1147 continue;
1148 ec_idx = tte->tte2ec_ix[k];
1149 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1150 BAD("tte->tte2ec_ix[..] out of range");
1151 if (ec_idx == j)
1152 n++;
1154 if (n != 1)
1155 BAD("tteno does not point back at eclass");
1159 /* That establishes that for each forward pointer from TTEntrys
1160 there is a corresponding backward pointer from the eclass[]
1161 arrays. However, it doesn't rule out the possibility of other,
1162 bogus pointers in the eclass[] arrays. So do those similarly:
1163 scan through them and check the TTEntryies they point at point
1164 back. */
1166 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1168 tte = &sec->tt[i];
1169 if (tte->status == Empty || tte->status == Deleted) {
1170 if (tte->n_tte2ec != 0)
1171 BAD("tte->n_eclasses nonzero for unused tte");
1172 continue;
1175 vg_assert(tte->status == InUse);
1177 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1178 BAD("tte->n_eclasses out of range(2)");
1180 for (j = 0; j < tte->n_tte2ec; j++) {
1181 ec_num = tte->tte2ec_ec[j];
1182 if (ec_num < 0 || ec_num >= ECLASS_N)
1183 BAD("tte->eclass[..] out of range");
1184 ec_idx = tte->tte2ec_ix[j];
1185 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1186 BAD("tte->ec_idx[..] out of range(2)");
1187 if (sec->ec2tte[ec_num][ec_idx] != i)
1188 BAD("ec2tte does not point back to tte");
1192 return True;
1194 bad:
1195 if (whassup)
1196 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1198 # if 0
1199 VG_(printf)("eclass = %d\n", i);
1200 VG_(printf)("tteno = %d\n", (Int)tteno);
1201 switch (tte->status) {
1202 case InUse: VG_(printf)("InUse\n"); break;
1203 case Deleted: VG_(printf)("Deleted\n"); break;
1204 case Empty: VG_(printf)("Empty\n"); break;
1206 if (tte->status != Empty) {
1207 for (k = 0; k < tte->vge.n_used; k++)
1208 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1210 # endif
1212 return False;
1214 # undef BAD
1218 /* Sanity check absolutely everything. True == check passed. */
1220 /* forwards */
1221 static Bool sanity_check_redir_tt_tc ( void );
1223 static Bool sanity_check_sector_search_order ( void )
1225 Int i, j, nListed;
1226 /* assert the array is the right size */
1227 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1228 / sizeof(sector_search_order[0])));
1229 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */
1230 for (i = 0; i < n_sectors; i++) {
1231 if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors)
1232 break;
1234 nListed = i;
1235 for (/* */; i < n_sectors; i++) {
1236 if (sector_search_order[i] != -1)
1237 break;
1239 if (i != n_sectors)
1240 return False;
1241 /* Check each sector number only appears once */
1242 for (i = 0; i < n_sectors; i++) {
1243 if (sector_search_order[i] == -1)
1244 continue;
1245 for (j = i+1; j < n_sectors; j++) {
1246 if (sector_search_order[j] == sector_search_order[i])
1247 return False;
1250 /* Check that the number of listed sectors equals the number
1251 in use, by counting nListed back down. */
1252 for (i = 0; i < n_sectors; i++) {
1253 if (sectors[i].tc != NULL)
1254 nListed--;
1256 if (nListed != 0)
1257 return False;
1258 return True;
1261 static Bool sanity_check_all_sectors ( void )
1263 Int sno;
1264 Bool sane;
1265 Sector* sec;
1266 for (sno = 0; sno < n_sectors; sno++) {
1267 Int i;
1268 Int nr_not_dead_hx = 0;
1269 Int szhxa;
1270 sec = &sectors[sno];
1271 if (sec->tc == NULL)
1272 continue;
1273 sane = sanity_check_eclasses_in_sector( sec );
1274 if (!sane)
1275 return False;
1276 szhxa = VG_(sizeXA)(sec->host_extents);
1277 for (i = 0; i < szhxa; i++) {
1278 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1279 if (!HostExtent__is_dead (hx, sec))
1280 nr_not_dead_hx++;
1282 if (nr_not_dead_hx != sec->tt_n_inuse) {
1283 VG_(debugLog)(0, "transtab",
1284 "nr_not_dead_hx %d sanity fail (expected == in use %d)\n",
1285 nr_not_dead_hx, sec->tt_n_inuse);
1286 return False;
1290 if ( !sanity_check_redir_tt_tc() )
1291 return False;
1292 if ( !sanity_check_sector_search_order() )
1293 return False;
1294 return True;
1299 /*-------------------------------------------------------------*/
1300 /*--- Add/find translations ---*/
1301 /*-------------------------------------------------------------*/
1303 static UInt vge_osize ( const VexGuestExtents* vge )
1305 UInt i, n = 0;
1306 for (i = 0; i < vge->n_used; i++)
1307 n += (UInt)vge->len[i];
1308 return n;
1311 static Bool isValidSector ( Int sector )
1313 if (sector < 0 || sector >= n_sectors)
1314 return False;
1315 return True;
1318 static inline UInt HASH_TT ( Addr key )
1320 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1321 UInt kLo = (UInt)key;
1322 UInt k32 = kHi ^ kLo;
1323 UInt ror = 7;
1324 if (ror > 0)
1325 k32 = (k32 >> ror) | (k32 << (32-ror));
1326 return k32 % N_TTES_PER_SECTOR;
1329 static void setFastCacheEntry ( Addr key, ULong* tcptr )
1331 UInt cno = (UInt)VG_TT_FAST_HASH(key);
1332 VG_(tt_fast)[cno].guest = key;
1333 VG_(tt_fast)[cno].host = (Addr)tcptr;
1334 n_fast_updates++;
1335 /* This shouldn't fail. It should be assured by m_translate
1336 which should reject any attempt to make translation of code
1337 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1338 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1341 /* Invalidate the fast cache VG_(tt_fast). */
1342 static void invalidateFastCache ( void )
1344 UInt j;
1345 /* This loop is popular enough to make it worth unrolling a
1346 bit, at least on ppc32. */
1347 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1348 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1349 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1350 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1351 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1352 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1355 vg_assert(j == VG_TT_FAST_SIZE);
1356 n_fast_flushes++;
1359 static void initialiseSector ( Int sno )
1361 Int i;
1362 SysRes sres;
1363 Sector* sec;
1364 vg_assert(isValidSector(sno));
1366 { Bool sane = sanity_check_sector_search_order();
1367 vg_assert(sane);
1369 sec = &sectors[sno];
1371 if (sec->tc == NULL) {
1373 /* Sector has never been used before. Need to allocate tt and
1374 tc. */
1375 vg_assert(sec->tt == NULL);
1376 vg_assert(sec->tc_next == NULL);
1377 vg_assert(sec->tt_n_inuse == 0);
1378 for (i = 0; i < ECLASS_N; i++) {
1379 vg_assert(sec->ec2tte_size[i] == 0);
1380 vg_assert(sec->ec2tte_used[i] == 0);
1381 vg_assert(sec->ec2tte[i] == NULL);
1383 vg_assert(sec->host_extents == NULL);
1385 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1386 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1388 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1389 if (sr_isError(sres)) {
1390 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1391 8 * tc_sector_szQ );
1392 /*NOTREACHED*/
1394 sec->tc = (ULong*)(Addr)sr_Res(sres);
1396 sres = VG_(am_mmap_anon_float_valgrind)
1397 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1398 if (sr_isError(sres)) {
1399 VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1400 N_TTES_PER_SECTOR * sizeof(TTEntry) );
1401 /*NOTREACHED*/
1403 sec->tt = (TTEntry*)(Addr)sr_Res(sres);
1405 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1406 sec->tt[i].status = Empty;
1407 sec->tt[i].n_tte2ec = 0;
1410 /* Set up the host_extents array. */
1411 sec->host_extents
1412 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1413 ttaux_free,
1414 sizeof(HostExtent));
1416 /* Add an entry in the sector_search_order */
1417 for (i = 0; i < n_sectors; i++) {
1418 if (sector_search_order[i] == -1)
1419 break;
1421 vg_assert(i >= 0 && i < n_sectors);
1422 sector_search_order[i] = sno;
1424 if (VG_(clo_verbosity) > 2)
1425 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1427 } else {
1429 /* Sector has been used before. Dump the old contents. */
1430 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1431 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1432 n_sectors_recycled++;
1434 vg_assert(sec->tt != NULL);
1435 vg_assert(sec->tc_next != NULL);
1436 n_dump_count += sec->tt_n_inuse;
1438 VexArch arch_host = VexArch_INVALID;
1439 VexArchInfo archinfo_host;
1440 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1441 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1442 VexEndness endness_host = archinfo_host.endness;
1444 /* Visit each just-about-to-be-abandoned translation. */
1445 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1446 sno);
1447 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1448 if (sec->tt[i].status == InUse) {
1449 vg_assert(sec->tt[i].n_tte2ec >= 1);
1450 vg_assert(sec->tt[i].n_tte2ec <= 3);
1451 n_dump_osize += vge_osize(&sec->tt[i].vge);
1452 /* Tell the tool too. */
1453 if (VG_(needs).superblock_discards) {
1454 VG_TDICT_CALL( tool_discard_superblock_info,
1455 sec->tt[i].entry,
1456 sec->tt[i].vge );
1458 unchain_in_preparation_for_deletion(arch_host,
1459 endness_host, sno, i);
1460 } else {
1461 vg_assert(sec->tt[i].n_tte2ec == 0);
1463 sec->tt[i].status = Empty;
1464 sec->tt[i].n_tte2ec = 0;
1466 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1467 sno);
1469 /* Free up the eclass structures. */
1470 for (i = 0; i < ECLASS_N; i++) {
1471 if (sec->ec2tte_size[i] == 0) {
1472 vg_assert(sec->ec2tte_used[i] == 0);
1473 vg_assert(sec->ec2tte[i] == NULL);
1474 } else {
1475 vg_assert(sec->ec2tte[i] != NULL);
1476 ttaux_free(sec->ec2tte[i]);
1477 sec->ec2tte[i] = NULL;
1478 sec->ec2tte_size[i] = 0;
1479 sec->ec2tte_used[i] = 0;
1483 /* Empty out the host extents array. */
1484 vg_assert(sec->host_extents != NULL);
1485 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1486 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1488 /* Sanity check: ensure it is already in
1489 sector_search_order[]. */
1490 for (i = 0; i < n_sectors; i++) {
1491 if (sector_search_order[i] == sno)
1492 break;
1494 vg_assert(i >= 0 && i < n_sectors);
1496 if (VG_(clo_verbosity) > 2)
1497 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1500 sec->tc_next = sec->tc;
1501 sec->tt_n_inuse = 0;
1503 invalidateFastCache();
1505 { Bool sane = sanity_check_sector_search_order();
1506 vg_assert(sane);
1511 /* Add a translation of vge to TT/TC. The translation is temporarily
1512 in code[0 .. code_len-1].
1514 pre: youngest_sector points to a valid (although possibly full)
1515 sector.
1517 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1518 Addr entry,
1519 Addr code,
1520 UInt code_len,
1521 Bool is_self_checking,
1522 Int offs_profInc,
1523 UInt n_guest_instrs )
1525 Int tcAvailQ, reqdQ, y, i;
1526 ULong *tcptr, *tcptr2;
1527 UChar* srcP;
1528 UChar* dstP;
1530 vg_assert(init_done);
1531 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1533 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1534 vg_assert(code_len > 0 && code_len < 60000);
1536 /* Generally stay sane */
1537 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1539 if (DEBUG_TRANSTAB)
1540 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1541 entry, code_len);
1543 n_in_count++;
1544 n_in_tsize += code_len;
1545 n_in_osize += vge_osize(vge);
1546 if (is_self_checking)
1547 n_in_sc_count++;
1549 y = youngest_sector;
1550 vg_assert(isValidSector(y));
1552 if (sectors[y].tc == NULL)
1553 initialiseSector(y);
1555 /* Try putting the translation in this sector. */
1556 reqdQ = (code_len + 7) >> 3;
1558 /* Will it fit in tc? */
1559 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1560 - ((ULong*)(sectors[y].tc_next));
1561 vg_assert(tcAvailQ >= 0);
1562 vg_assert(tcAvailQ <= tc_sector_szQ);
1564 if (tcAvailQ < reqdQ
1565 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1566 /* No. So move on to the next sector. Either it's never been
1567 used before, in which case it will get its tt/tc allocated
1568 now, or it has been used before, in which case it is set to be
1569 empty, hence throwing out the oldest sector. */
1570 vg_assert(tc_sector_szQ > 0);
1571 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1572 / N_TTES_PER_SECTOR;
1573 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1574 / tc_sector_szQ;
1575 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1576 VG_(dmsg)("transtab: "
1577 "declare sector %d full "
1578 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1579 y, tt_loading_pct, tc_loading_pct,
1580 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1582 youngest_sector++;
1583 if (youngest_sector >= n_sectors)
1584 youngest_sector = 0;
1585 y = youngest_sector;
1586 initialiseSector(y);
1589 /* Be sure ... */
1590 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1591 - ((ULong*)(sectors[y].tc_next));
1592 vg_assert(tcAvailQ >= 0);
1593 vg_assert(tcAvailQ <= tc_sector_szQ);
1594 vg_assert(tcAvailQ >= reqdQ);
1595 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1596 vg_assert(sectors[y].tt_n_inuse >= 0);
1598 /* Copy into tc. */
1599 tcptr = sectors[y].tc_next;
1600 vg_assert(tcptr >= &sectors[y].tc[0]);
1601 vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1603 dstP = (UChar*)tcptr;
1604 srcP = (UChar*)code;
1605 VG_(memcpy)(dstP, srcP, code_len);
1606 sectors[y].tc_next += reqdQ;
1607 sectors[y].tt_n_inuse++;
1609 /* more paranoia */
1610 tcptr2 = sectors[y].tc_next;
1611 vg_assert(tcptr2 >= &sectors[y].tc[0]);
1612 vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1614 /* Find an empty tt slot, and use it. There must be such a slot
1615 since tt is never allowed to get completely full. */
1616 i = HASH_TT(entry);
1617 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1618 while (True) {
1619 if (sectors[y].tt[i].status == Empty
1620 || sectors[y].tt[i].status == Deleted)
1621 break;
1622 i++;
1623 if (i >= N_TTES_PER_SECTOR)
1624 i = 0;
1627 TTEntry__init(&sectors[y].tt[i]);
1628 sectors[y].tt[i].status = InUse;
1629 sectors[y].tt[i].tcptr = tcptr;
1630 sectors[y].tt[i].count = 0;
1631 sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1632 sectors[y].tt[i].vge = *vge;
1633 sectors[y].tt[i].entry = entry;
1635 /* Patch in the profile counter location, if necessary. */
1636 if (offs_profInc != -1) {
1637 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1638 VexArch arch_host = VexArch_INVALID;
1639 VexArchInfo archinfo_host;
1640 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1641 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1642 VexEndness endness_host = archinfo_host.endness;
1643 VexInvalRange vir
1644 = LibVEX_PatchProfInc( arch_host, endness_host,
1645 dstP + offs_profInc,
1646 &sectors[y].tt[i].count );
1647 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1650 VG_(invalidate_icache)( dstP, code_len );
1652 /* Add this entry to the host_extents map, checking that we're
1653 adding in order. */
1654 { HostExtent hx;
1655 hx.start = (UChar*)tcptr;
1656 hx.len = code_len;
1657 hx.tteNo = i;
1658 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1659 XArray* hx_array = sectors[y].host_extents;
1660 vg_assert(hx_array);
1661 Word n = VG_(sizeXA)(hx_array);
1662 if (n > 0) {
1663 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1664 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1666 VG_(addToXA)(hx_array, &hx);
1667 if (DEBUG_TRANSTAB)
1668 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1669 hx.start, hx.len, y, i);
1672 /* Update the fast-cache. */
1673 setFastCacheEntry( entry, tcptr );
1675 /* Note the eclass numbers for this translation. */
1676 upd_eclasses_after_add( &sectors[y], i );
1680 /* Search for the translation of the given guest address. If
1681 requested, a successful search can also cause the fast-caches to be
1682 updated.
1684 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
1685 /*OUT*/UInt* res_sNo,
1686 /*OUT*/UInt* res_tteNo,
1687 Addr guest_addr,
1688 Bool upd_cache )
1690 Int i, j, k, kstart, sno;
1692 vg_assert(init_done);
1693 /* Find the initial probe point just once. It will be the same in
1694 all sectors and avoids multiple expensive % operations. */
1695 n_full_lookups++;
1696 k = -1;
1697 kstart = HASH_TT(guest_addr);
1698 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1700 /* Search in all the sectors,using sector_search_order[] as a
1701 heuristic guide as to what order to visit the sectors. */
1702 for (i = 0; i < n_sectors; i++) {
1704 sno = sector_search_order[i];
1705 if (UNLIKELY(sno == -1))
1706 return False; /* run out of sectors to search */
1708 k = kstart;
1709 for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1710 n_lookup_probes++;
1711 if (sectors[sno].tt[k].status == InUse
1712 && sectors[sno].tt[k].entry == guest_addr) {
1713 /* found it */
1714 if (upd_cache)
1715 setFastCacheEntry(
1716 guest_addr, sectors[sno].tt[k].tcptr );
1717 if (res_hcode)
1718 *res_hcode = (Addr)sectors[sno].tt[k].tcptr;
1719 if (res_sNo)
1720 *res_sNo = sno;
1721 if (res_tteNo)
1722 *res_tteNo = k;
1723 /* pull this one one step closer to the front. For large
1724 apps this more or less halves the number of required
1725 probes. */
1726 if (i > 0) {
1727 Int tmp = sector_search_order[i-1];
1728 sector_search_order[i-1] = sector_search_order[i];
1729 sector_search_order[i] = tmp;
1731 return True;
1733 if (sectors[sno].tt[k].status == Empty)
1734 break; /* not found in this sector */
1735 k++;
1736 if (k == N_TTES_PER_SECTOR)
1737 k = 0;
1740 /* If we fall off the end, all entries are InUse and not
1741 matching, or Deleted. In any case we did not find it in this
1742 sector. */
1745 /* Not found in any sector. */
1746 return False;
1750 /*-------------------------------------------------------------*/
1751 /*--- Delete translations. ---*/
1752 /*-------------------------------------------------------------*/
1754 /* forward */
1755 static void unredir_discard_translations( Addr, ULong );
1757 /* Stuff for deleting translations which intersect with a given
1758 address range. Unfortunately, to make this run at a reasonable
1759 speed, it is complex. */
1761 static inline
1762 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1764 Addr e1 = s1 + r1 - 1;
1765 Addr e2 = s2 + r2 - 1;
1766 if (e1 < s2 || e2 < s1)
1767 return False;
1768 return True;
1771 static inline
1772 Bool overlaps ( Addr start, ULong range, const VexGuestExtents* vge )
1774 if (overlap1(start, range, vge->base[0], vge->len[0]))
1775 return True;
1776 if (vge->n_used < 2)
1777 return False;
1778 if (overlap1(start, range, vge->base[1], vge->len[1]))
1779 return True;
1780 if (vge->n_used < 3)
1781 return False;
1782 if (overlap1(start, range, vge->base[2], vge->len[2]))
1783 return True;
1784 return False;
1788 /* Delete a tt entry, and update all the eclass data accordingly. */
1790 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1791 VexArch arch_host, VexEndness endness_host )
1793 Int i, ec_num, ec_idx;
1794 TTEntry* tte;
1796 /* sec and secNo are mutually redundant; cross-check. */
1797 vg_assert(sec == &sectors[secNo]);
1799 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1800 tte = &sec->tt[tteno];
1801 vg_assert(tte->status == InUse);
1802 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1804 /* Unchain .. */
1805 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1807 /* Deal with the ec-to-tte links first. */
1808 for (i = 0; i < tte->n_tte2ec; i++) {
1809 ec_num = (Int)tte->tte2ec_ec[i];
1810 ec_idx = tte->tte2ec_ix[i];
1811 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1812 vg_assert(ec_idx >= 0);
1813 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1814 /* Assert that the two links point at each other. */
1815 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1816 /* "delete" the pointer back to here. */
1817 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1820 /* Now fix up this TTEntry. */
1821 tte->status = Deleted;
1822 tte->n_tte2ec = 0;
1824 /* Stats .. */
1825 sec->tt_n_inuse--;
1826 n_disc_count++;
1827 n_disc_osize += vge_osize(&tte->vge);
1829 /* Tell the tool too. */
1830 if (VG_(needs).superblock_discards) {
1831 VG_TDICT_CALL( tool_discard_superblock_info,
1832 tte->entry,
1833 tte->vge );
1838 /* Delete translations from sec which intersect specified range, but
1839 only consider translations in the specified eclass. */
1841 static
1842 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1843 Addr guest_start, ULong range,
1844 Int ec,
1845 VexArch arch_host,
1846 VexEndness endness_host )
1848 Int i;
1849 UShort tteno;
1850 Bool anyDeld = False;
1851 TTEntry* tte;
1853 vg_assert(ec >= 0 && ec < ECLASS_N);
1855 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1857 tteno = sec->ec2tte[ec][i];
1858 if (tteno == EC2TTE_DELETED) {
1859 /* already deleted */
1860 continue;
1863 vg_assert(tteno < N_TTES_PER_SECTOR);
1865 tte = &sec->tt[tteno];
1866 vg_assert(tte->status == InUse);
1868 if (overlaps( guest_start, range, &tte->vge )) {
1869 anyDeld = True;
1870 delete_tte( sec, secNo, (Int)tteno, arch_host, endness_host );
1875 return anyDeld;
1879 /* Delete translations from sec which intersect specified range, the
1880 slow way, by inspecting all translations in sec. */
1882 static
1883 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1884 Addr guest_start, ULong range,
1885 VexArch arch_host,
1886 VexEndness endness_host )
1888 Int i;
1889 Bool anyDeld = False;
1891 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1892 if (sec->tt[i].status == InUse
1893 && overlaps( guest_start, range, &sec->tt[i].vge )) {
1894 anyDeld = True;
1895 delete_tte( sec, secNo, i, arch_host, endness_host );
1899 return anyDeld;
1903 void VG_(discard_translations) ( Addr guest_start, ULong range,
1904 const HChar* who )
1906 Sector* sec;
1907 Int sno, ec;
1908 Bool anyDeleted = False;
1910 vg_assert(init_done);
1912 VG_(debugLog)(2, "transtab",
1913 "discard_translations(0x%lx, %llu) req by %s\n",
1914 guest_start, range, who );
1916 /* Pre-deletion sanity check */
1917 if (VG_(clo_sanity_level >= 4)) {
1918 Bool sane = sanity_check_all_sectors();
1919 vg_assert(sane);
1922 if (range == 0)
1923 return;
1925 VexArch arch_host = VexArch_INVALID;
1926 VexArchInfo archinfo_host;
1927 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1928 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1929 VexEndness endness_host = archinfo_host.endness;
1931 /* There are two different ways to do this.
1933 If the range fits within a single address-range equivalence
1934 class, as will be the case for a cache line sized invalidation,
1935 then we only have to inspect the set of translations listed in
1936 that equivalence class, and also in the "sin-bin" equivalence
1937 class ECLASS_MISC.
1939 Otherwise, the invalidation is of a larger range and probably
1940 results from munmap. In this case it's (probably!) faster just
1941 to inspect all translations, dump those we don't want, and
1942 regenerate the equivalence class information (since modifying it
1943 in-situ is even more expensive).
1946 /* First off, figure out if the range falls within a single class,
1947 and if so which one. */
1949 ec = ECLASS_MISC;
1950 if (range < (1ULL << ECLASS_SHIFT))
1951 ec = range_to_eclass( guest_start, (UInt)range );
1953 /* if ec is ECLASS_MISC then we aren't looking at just a single
1954 class, so use the slow scheme. Else use the fast scheme,
1955 examining 'ec' and ECLASS_MISC. */
1957 if (ec != ECLASS_MISC) {
1959 VG_(debugLog)(2, "transtab",
1960 " FAST, ec = %d\n", ec);
1962 /* Fast scheme */
1963 vg_assert(ec >= 0 && ec < ECLASS_MISC);
1965 for (sno = 0; sno < n_sectors; sno++) {
1966 sec = &sectors[sno];
1967 if (sec->tc == NULL)
1968 continue;
1969 anyDeleted |= delete_translations_in_sector_eclass(
1970 sec, sno, guest_start, range, ec,
1971 arch_host, endness_host
1973 anyDeleted |= delete_translations_in_sector_eclass(
1974 sec, sno, guest_start, range, ECLASS_MISC,
1975 arch_host, endness_host
1979 } else {
1981 /* slow scheme */
1983 VG_(debugLog)(2, "transtab",
1984 " SLOW, ec = %d\n", ec);
1986 for (sno = 0; sno < n_sectors; sno++) {
1987 sec = &sectors[sno];
1988 if (sec->tc == NULL)
1989 continue;
1990 anyDeleted |= delete_translations_in_sector(
1991 sec, sno, guest_start, range,
1992 arch_host, endness_host
1998 if (anyDeleted)
1999 invalidateFastCache();
2001 /* don't forget the no-redir cache */
2002 unredir_discard_translations( guest_start, range );
2004 /* Post-deletion sanity check */
2005 if (VG_(clo_sanity_level >= 4)) {
2006 Int i;
2007 TTEntry* tte;
2008 Bool sane = sanity_check_all_sectors();
2009 vg_assert(sane);
2010 /* But now, also check the requested address range isn't
2011 present anywhere. */
2012 for (sno = 0; sno < n_sectors; sno++) {
2013 sec = &sectors[sno];
2014 if (sec->tc == NULL)
2015 continue;
2016 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2017 tte = &sec->tt[i];
2018 if (tte->status != InUse)
2019 continue;
2020 vg_assert(!overlaps( guest_start, range, &tte->vge ));
2026 /* Whether or not tools may discard translations. */
2027 Bool VG_(ok_to_discard_translations) = False;
2029 /* This function is exported to tools which can use it to discard
2030 translations, provided it is safe to do so. */
2031 void VG_(discard_translations_safely) ( Addr start, SizeT len,
2032 const HChar* who )
2034 vg_assert(VG_(ok_to_discard_translations));
2035 VG_(discard_translations)(start, len, who);
2038 /*------------------------------------------------------------*/
2039 /*--- AUXILIARY: the unredirected TT/TC ---*/
2040 /*------------------------------------------------------------*/
2042 /* A very simple translation cache which holds a small number of
2043 unredirected translations. This is completely independent of the
2044 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2045 both structures are simply dumped and we start over.
2047 Since these translations are unredirected, the search key is (by
2048 definition) the first address entry in the .vge field. */
2050 /* Sized to hold 500 translations of average size 1000 bytes. */
2052 #define UNREDIR_SZB 1000
2054 #define N_UNREDIR_TT 500
2055 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2057 typedef
2058 struct {
2059 VexGuestExtents vge;
2060 Addr hcode;
2061 Bool inUse;
2063 UTCEntry;
2065 /* We just allocate forwards in _tc, never deleting. */
2066 static ULong *unredir_tc;
2067 static Int unredir_tc_used = N_UNREDIR_TCQ;
2069 /* Slots in _tt can come into use and out again (.inUse).
2070 Nevertheless _tt_highwater is maintained so that invalidations
2071 don't have to scan all the slots when only a few are in use.
2072 _tt_highwater holds the index of the highest ever allocated
2073 slot. */
2074 static UTCEntry unredir_tt[N_UNREDIR_TT];
2075 static Int unredir_tt_highwater;
2078 static void init_unredir_tt_tc ( void )
2080 Int i;
2081 if (unredir_tc == NULL) {
2082 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2083 ( N_UNREDIR_TT * UNREDIR_SZB );
2084 if (sr_isError(sres)) {
2085 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2086 N_UNREDIR_TT * UNREDIR_SZB);
2087 /*NOTREACHED*/
2089 unredir_tc = (ULong *)(Addr)sr_Res(sres);
2091 unredir_tc_used = 0;
2092 for (i = 0; i < N_UNREDIR_TT; i++)
2093 unredir_tt[i].inUse = False;
2094 unredir_tt_highwater = -1;
2097 /* Do a sanity check; return False on failure. */
2098 static Bool sanity_check_redir_tt_tc ( void )
2100 Int i;
2101 if (unredir_tt_highwater < -1) return False;
2102 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2104 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2105 if (unredir_tt[i].inUse)
2106 return False;
2108 if (unredir_tc_used < 0) return False;
2109 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2111 return True;
2115 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2116 is temporarily in code[0 .. code_len-1].
2118 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2119 Addr entry,
2120 Addr code,
2121 UInt code_len )
2123 Int i, j, code_szQ;
2124 HChar *srcP, *dstP;
2126 vg_assert(sanity_check_redir_tt_tc());
2128 /* This is the whole point: it's not redirected! */
2129 vg_assert(entry == vge->base[0]);
2131 /* How many unredir_tt slots are needed */
2132 code_szQ = (code_len + 7) / 8;
2134 /* Look for an empty unredir_tc slot */
2135 for (i = 0; i < N_UNREDIR_TT; i++)
2136 if (!unredir_tt[i].inUse)
2137 break;
2139 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2140 /* It's full; dump everything we currently have */
2141 init_unredir_tt_tc();
2142 i = 0;
2145 vg_assert(unredir_tc_used >= 0);
2146 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2147 vg_assert(code_szQ > 0);
2148 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2149 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2150 vg_assert(unredir_tt[i].inUse == False);
2152 if (i > unredir_tt_highwater)
2153 unredir_tt_highwater = i;
2155 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2156 srcP = (HChar*)code;
2157 for (j = 0; j < code_len; j++)
2158 dstP[j] = srcP[j];
2160 VG_(invalidate_icache)( dstP, code_len );
2162 unredir_tt[i].inUse = True;
2163 unredir_tt[i].vge = *vge;
2164 unredir_tt[i].hcode = (Addr)dstP;
2166 unredir_tc_used += code_szQ;
2167 vg_assert(unredir_tc_used >= 0);
2168 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2170 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2173 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
2174 Addr guest_addr )
2176 Int i;
2177 for (i = 0; i < N_UNREDIR_TT; i++) {
2178 if (!unredir_tt[i].inUse)
2179 continue;
2180 if (unredir_tt[i].vge.base[0] == guest_addr) {
2181 *result = unredir_tt[i].hcode;
2182 return True;
2185 return False;
2188 static void unredir_discard_translations( Addr guest_start, ULong range )
2190 Int i;
2192 vg_assert(sanity_check_redir_tt_tc());
2194 for (i = 0; i <= unredir_tt_highwater; i++) {
2195 if (unredir_tt[i].inUse
2196 && overlaps( guest_start, range, &unredir_tt[i].vge))
2197 unredir_tt[i].inUse = False;
2202 /*------------------------------------------------------------*/
2203 /*--- Initialisation. ---*/
2204 /*------------------------------------------------------------*/
2206 void VG_(init_tt_tc) ( void )
2208 Int i, avg_codeszQ;
2210 vg_assert(!init_done);
2211 init_done = True;
2213 /* Otherwise lots of things go wrong... */
2214 vg_assert(sizeof(ULong) == 8);
2215 /* check fast cache entries really are 2 words long */
2216 vg_assert(sizeof(Addr) == sizeof(void*));
2217 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2218 /* check fast cache entries are packed back-to-back with no spaces */
2219 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2220 /* check fast cache is aligned as we requested. Not fatal if it
2221 isn't, but we might as well make sure. */
2222 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2224 if (VG_(clo_verbosity) > 2)
2225 VG_(message)(Vg_DebugMsg,
2226 "TT/TC: VG_(init_tt_tc) "
2227 "(startup of code management)\n");
2229 /* Figure out how big each tc area should be. */
2230 if (VG_(clo_avg_transtab_entry_size) == 0)
2231 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2232 else
2233 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2234 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2236 /* Ensure the calculated value is not way crazy. */
2237 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2238 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2240 n_sectors = VG_(clo_num_transtab_sectors);
2241 vg_assert(n_sectors >= MIN_N_SECTORS);
2242 vg_assert(n_sectors <= MAX_N_SECTORS);
2244 /* Initialise the sectors, even the ones we aren't going to use.
2245 Set all fields to zero. */
2246 youngest_sector = 0;
2247 for (i = 0; i < MAX_N_SECTORS; i++)
2248 VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2250 /* Initialise the sector_search_order hint table, including the
2251 entries we aren't going to use. */
2252 for (i = 0; i < MAX_N_SECTORS; i++)
2253 sector_search_order[i] = -1;
2255 /* Initialise the fast cache. */
2256 invalidateFastCache();
2258 /* and the unredir tt/tc */
2259 init_unredir_tt_tc();
2261 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2262 || VG_(debugLog_getLevel) () >= 2) {
2263 VG_(message)(Vg_DebugMsg,
2264 "TT/TC: cache: %s--avg-transtab-entry-size=%d, "
2265 "%stool provided default %d\n",
2266 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2267 VG_(clo_avg_transtab_entry_size),
2268 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2269 VG_(details).avg_translation_sizeB);
2270 VG_(message)(Vg_DebugMsg,
2271 "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2272 n_sectors, 8 * tc_sector_szQ,
2273 n_sectors * 8 * tc_sector_szQ );
2274 VG_(message)(Vg_DebugMsg,
2275 "TT/TC: table: %d tables of %d bytes each = %d total\n",
2276 n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2277 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2278 VG_(message)(Vg_DebugMsg,
2279 "TT/TC: table: %d entries each = %d total entries"
2280 " max occupancy %d (%d%%)\n",
2281 N_TTES_PER_SECTOR,
2282 n_sectors * N_TTES_PER_SECTOR,
2283 n_sectors * N_TTES_PER_SECTOR_USABLE,
2284 SECTOR_TT_LIMIT_PERCENT );
2289 /*------------------------------------------------------------*/
2290 /*--- Printing out statistics. ---*/
2291 /*------------------------------------------------------------*/
2293 static ULong safe_idiv( ULong a, ULong b )
2295 return (b == 0 ? 0 : a / b);
2298 UInt VG_(get_bbs_translated) ( void )
2300 return n_in_count;
2303 void VG_(print_tt_tc_stats) ( void )
2305 VG_(message)(Vg_DebugMsg,
2306 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2307 n_full_lookups, n_lookup_probes );
2308 VG_(message)(Vg_DebugMsg,
2309 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2310 n_fast_updates, n_fast_flushes );
2312 VG_(message)(Vg_DebugMsg,
2313 " transtab: new %'lld "
2314 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs] "
2315 "avg tce size %d\n",
2316 n_in_count, n_in_osize, n_in_tsize,
2317 safe_idiv(10*n_in_tsize, n_in_osize),
2318 n_in_sc_count,
2319 (int) (n_in_tsize / n_in_count));
2320 VG_(message)(Vg_DebugMsg,
2321 " transtab: dumped %'llu (%'llu -> ?" "?) "
2322 "(sectors recycled %'llu)\n",
2323 n_dump_count, n_dump_osize, n_sectors_recycled );
2324 VG_(message)(Vg_DebugMsg,
2325 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2326 n_disc_count, n_disc_osize );
2328 if (DEBUG_TRANSTAB) {
2329 Int i;
2330 VG_(printf)("\n");
2331 for (i = 0; i < ECLASS_N; i++) {
2332 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2333 if (i % 16 == 15)
2334 VG_(printf)("\n");
2336 VG_(printf)("\n\n");
2340 /*------------------------------------------------------------*/
2341 /*--- Printing out of profiling results. ---*/
2342 /*------------------------------------------------------------*/
2344 static ULong score ( const TTEntry* tte )
2346 return ((ULong)tte->weight) * ((ULong)tte->count);
2349 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2351 Int sno, i, r, s;
2352 ULong score_total;
2354 /* First, compute the total weighted count, and find the top N
2355 ttes. tops contains pointers to the most-used n_tops blocks, in
2356 descending order (viz, tops[0] is the highest scorer). */
2357 for (i = 0; i < n_tops; i++) {
2358 tops[i].addr = 0;
2359 tops[i].score = 0;
2362 score_total = 0;
2364 for (sno = 0; sno < n_sectors; sno++) {
2365 if (sectors[sno].tc == NULL)
2366 continue;
2367 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2368 if (sectors[sno].tt[i].status != InUse)
2369 continue;
2370 score_total += score(&sectors[sno].tt[i]);
2371 /* Find the rank for sectors[sno].tt[i]. */
2372 r = n_tops-1;
2373 while (True) {
2374 if (r == -1)
2375 break;
2376 if (tops[r].addr == 0) {
2377 r--;
2378 continue;
2380 if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2381 r--;
2382 continue;
2384 break;
2386 r++;
2387 vg_assert(r >= 0 && r <= n_tops);
2388 /* This bb should be placed at r, and bbs above it shifted
2389 upwards one slot. */
2390 if (r < n_tops) {
2391 for (s = n_tops-1; s > r; s--)
2392 tops[s] = tops[s-1];
2393 tops[r].addr = sectors[sno].tt[i].entry;
2394 tops[r].score = score( &sectors[sno].tt[i] );
2399 /* Now zero out all the counter fields, so that we can make
2400 multiple calls here and just get the values since the last call,
2401 each time, rather than values accumulated for the whole run. */
2402 for (sno = 0; sno < n_sectors; sno++) {
2403 if (sectors[sno].tc == NULL)
2404 continue;
2405 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2406 if (sectors[sno].tt[i].status != InUse)
2407 continue;
2408 sectors[sno].tt[i].count = 0;
2412 return score_total;
2415 /*--------------------------------------------------------------------*/
2416 /*--- end ---*/
2417 /*--------------------------------------------------------------------*/