Bug 444488 - Use glibc.pthread.stack_cache_size tunable
[valgrind.git] / coregrind / m_transtab.c
blob09a3da02ba3059befbafa1854a72c441f104eb92
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_core_basics.h"
31 #include "pub_core_debuglog.h"
32 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
33 #include "pub_core_libcbase.h"
34 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
35 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
36 #include "pub_core_libcassert.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_options.h"
39 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
40 #include "pub_core_transtab.h"
41 #include "pub_core_aspacemgr.h"
42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43 #include "pub_core_xarray.h"
44 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
47 #define DEBUG_TRANSTAB 0
50 /*-------------------------------------------------------------*/
51 /*--- Management of the FIFO-based translation table+cache. ---*/
52 /*-------------------------------------------------------------*/
54 /* Nr of sectors provided via command line parameter. */
55 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
56 /* Nr of sectors.
57 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
58 static SECno n_sectors = 0;
60 /* Average size of a transtab code entry. 0 means to use the tool
61 provided default. */
62 UInt VG_(clo_avg_transtab_entry_size) = 0;
64 /*------------------ CONSTANTS ------------------*/
65 /* Number of entries in hash table of each sector. This needs to be a prime
66 number to work properly, it must be <= 65535 (so that a TTE index
67 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
68 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the
69 hash table.
70 It is strongly recommended not to change this.
71 65521 is the largest prime <= 65535. */
72 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
74 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
75 #define HTT_DELETED EC2TTE_DELETED
76 #define HTT_EMPTY 0XFFFE
78 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
79 typedef UShort HTTno;
81 /* Because each sector contains a hash table of TTEntries, we need to
82 specify the maximum allowable loading, after which the sector is
83 deemed full. */
84 #define SECTOR_TT_LIMIT_PERCENT 65
86 /* The sector is deemed full when this many entries are in it. */
87 #define N_TTES_PER_SECTOR \
88 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
90 /* Equivalence classes for fast address range deletion. There are 1 +
91 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
92 address range which does not fall cleanly within any specific bin.
93 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
94 ECLASS_N must fit in a EclassNo. */
95 #define ECLASS_SHIFT 13
96 #define ECLASS_WIDTH 9
97 #define ECLASS_MISC (1 << ECLASS_WIDTH)
98 #define ECLASS_N (1 + ECLASS_MISC)
99 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
101 typedef UShort EClassNo;
103 /*------------------ TYPES ------------------*/
105 /* In edges ("to-me") in the graph created by chaining. */
106 typedef
107 struct {
108 SECno from_sNo; /* sector number */
109 TTEno from_tteNo; /* TTE number in given sector */
110 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr
111 where the patch is */
112 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */
114 InEdge;
117 /* Out edges ("from-me") in the graph created by chaining. */
118 typedef
119 struct {
120 SECno to_sNo; /* sector number */
121 TTEno to_tteNo; /* TTE number in given sector */
122 UInt from_offs; /* code offset in owning translation where patch is */
124 OutEdge;
127 #define N_FIXED_IN_EDGE_ARR 3
128 typedef
129 struct {
130 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
131 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
132 union {
133 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */
134 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */
135 } edges;
137 InEdgeArr;
139 #define N_FIXED_OUT_EDGE_ARR 2
140 typedef
141 struct {
142 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
143 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
144 union {
145 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */
146 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */
147 } edges;
149 OutEdgeArr;
152 /* A translation-table entry. This indicates precisely which areas of
153 guest code are included in the translation, and contains all other
154 auxiliary info too. These are split into hold and cold parts,
155 TTEntryH and TTEntryC, so as to be more cache-friendly
156 (a.k.a. "faster") when searching for translations that intersect
157 with a certain guest code address range, for deletion. In the
158 worst case this can involve a sequential scan through all the hot
159 parts, so they are packed as compactly as possible -- 32 bytes on a
160 64-bit platform, 20 bytes on a 32-bit platform.
162 Each TTEntry is logically a matching cold-and-hot pair, and in fact
163 it was originally one structure. First, the cold part.
165 typedef
166 struct {
167 union {
168 struct {
169 /* Profiling only: the count and weight (arbitrary meaning) for
170 this translation. Weight is a property of the translation
171 itself and computed once when the translation is created.
172 Count is an entry count for the translation and is
173 incremented by 1 every time the translation is used, if we
174 are profiling. */
175 ULong count;
176 UShort weight;
177 } prof; // if status == InUse
178 TTEno next_empty_tte; // if status != InUse
179 } usage;
181 /* 64-bit aligned pointer to one or more 64-bit words containing
182 the corresponding host code (must be in the same sector!)
183 This is a pointer into the sector's tc (code) area. */
184 ULong* tcptr;
186 /* This is the original guest address that purportedly is the
187 entry point of the translation. You might think that .entry
188 should be the same as .vge->base[0], and most of the time it
189 is. However, when doing redirections, that is not the case.
190 .vge must always correctly describe the guest code sections
191 from which this translation was made. However, .entry may or
192 may not be a lie, depending on whether or not we're doing
193 redirection. */
194 Addr entry;
196 /* Address range summary info: these are pointers back to
197 eclass[] entries in the containing Sector. Those entries in
198 turn point back here -- the two structures are mutually
199 redundant but both necessary to make fast deletions work.
200 The eclass info is similar to, and derived from, this entry's
201 'vge' field, but it is not the same */
202 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
203 EClassNo tte2ec_ec[3]; // for each, the eclass #
204 UInt tte2ec_ix[3]; // and the index within the eclass.
205 // for i in 0 .. n_tte2ec-1
206 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
207 // should be the index
208 // of this TTEntry in the containing Sector's tt array.
210 /* Admin information for chaining. 'in_edges' is a set of the
211 patch points which jump to this translation -- hence are
212 predecessors in the control flow graph. 'out_edges' points
213 to successors in the control flow graph -- translations to
214 which this one has a patched jump. In short these are just
215 backwards and forwards edges in the graph of patched-together
216 blocks. The 'in_edges' contain slightly more info, enough
217 that we can undo the chaining of each mentioned patch point.
218 The 'out_edges' list exists only so that we can visit the
219 'in_edges' entries of all blocks we're patched through to, in
220 order to remove ourselves from then when we're deleted. */
222 /* A translation can disappear for two reasons:
223 1. erased (as part of the oldest sector cleanup) when the
224 youngest sector is full.
225 2. discarded due to calls to VG_(discard_translations).
226 VG_(discard_translations) sets the status of the
227 translation to 'Deleted'.
228 A.o., the gdbserver discards one or more translations
229 when a breakpoint is inserted or removed at an Addr,
230 or when single stepping mode is enabled/disabled
231 or when a translation is instrumented for gdbserver
232 (all the target jumps of this translation are
233 invalidated).
235 So, it is possible that the translation A to be patched
236 (to obtain a patched jump from A to B) is invalidated
237 after B is translated and before A is patched.
238 In case a translation is erased or discarded, the patching
239 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
240 are checking the 'from' translation still exists before
241 doing the patching.
243 Is it safe to erase or discard the current translation E being
244 executed ? Amazing, but yes, it is safe.
245 Here is the explanation:
247 The translation E being executed can only be erased if a new
248 translation N is being done. A new translation is done only
249 if the host addr is a not yet patched jump to another
250 translation. In such a case, the guest address of N is
251 assigned to the PC in the VEX state. Control is returned
252 to the scheduler. N will be translated. This can erase the
253 translation E (in case of sector full). VG_(tt_tc_do_chaining)
254 will not do the chaining to a non found translation E.
255 The execution will continue at the current guest PC
256 (i.e. the translation N).
257 => it is safe to erase the current translation being executed.
259 The current translation E being executed can also be discarded
260 (e.g. by gdbserver). VG_(discard_translations) will mark
261 this translation E as Deleted, but the translation itself
262 is not erased. In particular, its host code can only
263 be overwritten or erased in case a new translation is done.
264 A new translation will only be done if a not yet translated
265 jump is to be executed. The execution of the Deleted translation
266 E will continue till a non patched jump is encountered.
267 This situation is then similar to the 'erasing' case above :
268 the current translation E can be erased or overwritten, as the
269 execution will continue at the new translation N.
272 /* It is possible, although very unlikely, that a block A has
273 more than one patched jump to block B. This could happen if
274 (eg) A finishes "jcond B; jmp B".
276 This means in turn that B's in_edges set can list A more than
277 once (twice in this example). However, each such entry must
278 have a different from_offs, since a patched jump can only
279 jump to one place at once (it's meaningless for it to have
280 multiple destinations.) IOW, the successor and predecessor
281 edges in the graph are not uniquely determined by a
282 TTEntry --> TTEntry pair, but rather by a
283 (TTEntry,offset) --> TTEntry triple.
285 If A has multiple edges to B then B will mention A multiple
286 times in its in_edges. To make things simpler, we then
287 require that A mentions B exactly the same number of times in
288 its out_edges. Furthermore, a matching out-in pair must have
289 the same offset (from_offs). This facilitates sanity
290 checking, and it facilitates establishing the invariant that
291 a out_edges set may not have duplicates when using the
292 equality defined by (TTEntry,offset). Hence the out_edges
293 and in_edges sets really do have both have set semantics.
295 eg if A has been patched to B at offsets 42 and 87 (in A)
296 then A.out_edges = { (B,42), (B,87) } (in any order)
297 and B.in_edges = { (A,42), (A,87) } (in any order)
299 Hence for each node pair P->Q in the graph, there's a 1:1
300 mapping between P.out_edges and Q.in_edges.
302 InEdgeArr in_edges;
303 OutEdgeArr out_edges;
305 TTEntryC;
307 /* And this is the hot part. */
308 typedef
309 struct {
310 /* This structure describes precisely what ranges of guest code
311 the translation covers, so we can decide whether or not to
312 delete it when translations of a given address range are
313 invalidated. Originally this was a VexGuestExtents, but that
314 itself is 32 bytes on a 64-bit target, and we really want to
315 squeeze in an 8-bit |status| field into the 32 byte field, so
316 as to get 2 of them in a 64 byte LLC line. Hence the
317 VexGuestExtents fields are inlined, the _n_used field is
318 changed to a UChar (it only ever has the values 1, 2 or 3)
319 and the 8-bit status field is placed in byte 31 of the
320 structure. */
321 /* ORIGINALLY: VexGuestExtents vge; */
322 Addr vge_base[3];
323 UShort vge_len[3];
324 UChar vge_n_used; /* BEWARE: is UShort in VexGuestExtents */
326 /* Status of the slot. Note, we need to be able to do lazy
327 deletion, hence the Deleted state. */
328 enum { InUse, Deleted, Empty } status : 8;
330 TTEntryH;
333 /* Impedance matchers, that convert between a VexGuestExtents and a
334 TTEntryH, ignoring TTEntryH::status, which doesn't exist in a
335 VexGuestExtents -- it is entirely unrelated. */
337 /* Takes a VexGuestExtents and pushes it into a TTEntryH. The
338 TTEntryH.status field is left unchanged. */
339 static
340 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH,
341 const VexGuestExtents* vge )
343 tteH->vge_base[0] = vge->base[0];
344 tteH->vge_base[1] = vge->base[1];
345 tteH->vge_base[2] = vge->base[2];
346 tteH->vge_len[0] = vge->len[0];
347 tteH->vge_len[1] = vge->len[1];
348 tteH->vge_len[2] = vge->len[2];
349 tteH->vge_n_used = (UChar)vge->n_used; /* BEWARE: no range check. */
352 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */
353 static
354 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge,
355 const TTEntryH* tteH )
357 vge->base[0] = tteH->vge_base[0];
358 vge->base[1] = tteH->vge_base[1];
359 vge->base[2] = tteH->vge_base[2];
360 vge->len[0] = tteH->vge_len[0] ;
361 vge->len[1] = tteH->vge_len[1] ;
362 vge->len[2] = tteH->vge_len[2] ;
363 vge->n_used = (UShort)tteH->vge_n_used ;
367 /* A structure used for mapping host code addresses back to the
368 relevant TTEntry. Used when doing chaining, for finding the
369 TTEntry to which some arbitrary patch address belongs. */
370 typedef
371 struct {
372 UChar* start;
373 UInt len;
374 TTEno tteNo;
376 HostExtent;
378 /* Finally, a sector itself. Each sector contains an array of
379 TCEntries, which hold code, and an array of TTEntries, containing
380 all required administrative info. Profiling is supported using the
381 TTEntry usage.prof.count and usage.prof.weight fields, if required.
383 If the sector is not in use, all three pointers are NULL and
384 tt_n_inuse is zero.
386 typedef
387 struct {
388 /* The TCEntry area. Size of this depends on the average
389 translation size. We try and size it so it becomes full
390 precisely when this sector's translation table (tt) reaches
391 its load limit (SECTOR_TT_LIMIT_PERCENT). */
392 ULong* tc;
394 /* An hash table, mapping guest address to an index in the tt array.
395 htt is a fixed size, always containing
396 exactly N_HTTES_PER_SECTOR entries. */
397 TTEno* htt;
399 /* The TTEntry{C,H} arrays. These are a fixed size, always
400 containing exactly N_TTES_PER_SECTOR entries. */
401 TTEntryC* ttC;
402 TTEntryH* ttH;
404 /* This points to the current allocation point in tc. */
405 ULong* tc_next;
407 /* The count of tt entries with state InUse. */
408 Int tt_n_inuse;
410 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
411 TTEno empty_tt_list;
413 /* Expandable arrays of tt indices for each of the ECLASS_N
414 address range equivalence classes. These hold indices into
415 the containing sector's tt array, which in turn should point
416 back here. */
417 Int ec2tte_size[ECLASS_N];
418 Int ec2tte_used[ECLASS_N];
419 TTEno* ec2tte[ECLASS_N];
421 /* The host extents. The [start, +len) ranges are constructed
422 in strictly non-overlapping order, so we can binary search
423 them at any time. */
424 XArray* host_extents; /* XArray* of HostExtent */
426 Sector;
429 /*------------------ DECLS ------------------*/
431 /* The root data structure is an array of sectors. The index of the
432 youngest sector is recorded, and new translations are put into that
433 sector. When it fills up, we move along to the next sector and
434 start to fill that up, wrapping around at the end of the array.
435 That way, once all N_TC_SECTORS have been bought into use for the
436 first time, and are full, we then re-use the oldest sector,
437 endlessly.
439 When running, youngest sector should be between >= 0 and <
440 N_TC_SECTORS. The initial value indicates the TT/TC system is
441 not yet initialised.
443 static Sector sectors[MAX_N_SECTORS];
444 static Int youngest_sector = INV_SNO;
446 /* The number of ULongs in each TCEntry area. This is computed once
447 at startup and does not change. */
448 static Int tc_sector_szQ = 0;
451 /* A list of sector numbers, in the order which they should be
452 searched to find translations. This is an optimisation to be used
453 when searching for translations and should not affect
454 correctness. INV_SNO denotes "no entry". */
455 static SECno sector_search_order[MAX_N_SECTORS];
458 /* Fast helper for the TC. A 4-way set-associative cache, with more-or-less LRU
459 replacement. It holds a set of recently used (guest address, host address)
460 pairs. This array is referred to directly from
461 m_dispatch/dispatch-<platform>.S.
463 Entries in tt_fast may refer to any valid TC entry, regardless of
464 which sector it's in. Consequently we must be very careful to
465 invalidate this cache when TC entries are changed or disappear.
467 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
468 pointed at to cause that cache entry to miss. This relies on the
469 assumption that no guest code actually has that address, hence a
470 value 0x1 seems good. m_translate gives the client a synthetic
471 segfault if it tries to execute at this address.
474 typedef
475 struct {
476 Addr guest0;
477 Addr host0;
478 Addr guest1;
479 Addr host1;
480 Addr guest2;
481 Addr host2;
482 Addr guest3;
483 Addr host3;
485 FastCacheSet;
487 /*global*/ __attribute__((aligned(64)))
488 FastCacheSet VG_(tt_fast)[VG_TT_FAST_SETS];
490 /* Make sure we're not used before initialisation. */
491 static Bool init_done = False;
494 /*------------------ STATS DECLS ------------------*/
496 /* Number of fast-cache updates and flushes done. */
497 static ULong n_fast_flushes = 0;
498 static ULong n_fast_updates = 0;
500 /* Number of full lookups done. */
501 static ULong n_full_lookups = 0;
502 static ULong n_lookup_probes = 0;
504 /* Number/osize/tsize of translations entered; also the number of
505 those for which self-checking was requested. */
506 static ULong n_in_count = 0;
507 static ULong n_in_osize = 0;
508 static ULong n_in_tsize = 0;
509 static ULong n_in_sc_count = 0;
511 /* Number/osize of translations discarded due to lack of space. */
512 static ULong n_dump_count = 0;
513 static ULong n_dump_osize = 0;
514 static ULong n_sectors_recycled = 0;
516 /* Number/osize of translations discarded due to requests to do so. */
517 static ULong n_disc_count = 0;
518 static ULong n_disc_osize = 0;
521 /*-------------------------------------------------------------*/
522 /*--- Misc ---*/
523 /*-------------------------------------------------------------*/
525 static void* ttaux_malloc ( const HChar* tag, SizeT n )
527 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
530 static void ttaux_free ( void* p )
532 VG_(arena_free)(VG_AR_TTAUX, p);
536 /*-------------------------------------------------------------*/
537 /*--- Chaining support ---*/
538 /*-------------------------------------------------------------*/
540 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo )
542 vg_assert(sNo < n_sectors);
543 vg_assert(tteNo < N_TTES_PER_SECTOR);
544 Sector* s = &sectors[sNo];
545 vg_assert(s->ttC && s->ttH);
546 TTEntryC* tteC = &s->ttC[tteNo];
547 TTEntryH* tteH = &s->ttH[tteNo];
548 vg_assert(tteH->status == InUse);
549 return tteC;
552 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo )
554 vg_assert(sNo < n_sectors);
555 vg_assert(tteNo < N_TTES_PER_SECTOR);
556 Sector* s = &sectors[sNo];
557 vg_assert(s->ttH);
558 TTEntryH* tteH = &s->ttH[tteNo];
559 vg_assert(tteH->status == InUse);
560 return tteH;
563 static void InEdge__init ( InEdge* ie )
565 ie->from_sNo = INV_SNO; /* invalid */
566 ie->from_tteNo = 0;
567 ie->from_offs = 0;
568 ie->to_fastEP = False;
571 static void OutEdge__init ( OutEdge* oe )
573 oe->to_sNo = INV_SNO; /* invalid */
574 oe->to_tteNo = 0;
575 oe->from_offs = 0;
578 static void TTEntryC__init ( TTEntryC* tteC )
580 VG_(bzero_inline)(tteC, sizeof(*tteC));
583 static void TTEntryH__init ( TTEntryH* tteH )
585 VG_(bzero_inline)(tteH, sizeof(*tteH));
588 static UWord InEdgeArr__size ( const InEdgeArr* iea )
590 if (iea->has_var) {
591 vg_assert(iea->n_fixed == 0);
592 return VG_(sizeXA)(iea->edges.var);
593 } else {
594 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
595 return iea->n_fixed;
599 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
601 if (iea->has_var) {
602 vg_assert(iea->n_fixed == 0);
603 VG_(deleteXA)(iea->edges.var);
604 iea->edges.var = NULL;
605 iea->has_var = False;
606 } else {
607 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
608 iea->n_fixed = 0;
612 static
613 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
615 if (iea->has_var) {
616 vg_assert(iea->n_fixed == 0);
617 return (InEdge*)VG_(indexXA)(iea->edges.var, i);
618 } else {
619 vg_assert(i < iea->n_fixed);
620 return &iea->edges.fixed[i];
624 static
625 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
627 if (iea->has_var) {
628 vg_assert(iea->n_fixed == 0);
629 VG_(removeIndexXA)(iea->edges.var, i);
630 } else {
631 vg_assert(i < iea->n_fixed);
632 for (; i+1 < iea->n_fixed; i++) {
633 iea->edges.fixed[i] = iea->edges.fixed[i+1];
635 iea->n_fixed--;
639 static
640 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
642 if (iea->has_var) {
643 vg_assert(iea->n_fixed == 0);
644 VG_(addToXA)(iea->edges.var, ie);
645 } else {
646 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
647 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
648 /* The fixed array is full, so we have to initialise an
649 XArray and copy the fixed array into it. */
650 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
651 ttaux_free,
652 sizeof(InEdge));
653 VG_(hintSizeXA) (var, iea->n_fixed + 1);
654 UWord i;
655 for (i = 0; i < iea->n_fixed; i++) {
656 VG_(addToXA)(var, &iea->edges.fixed[i]);
658 VG_(addToXA)(var, ie);
659 iea->n_fixed = 0;
660 iea->has_var = True;
661 iea->edges.var = var;
662 } else {
663 /* Just add to the fixed array. */
664 iea->edges.fixed[iea->n_fixed++] = *ie;
669 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
671 if (oea->has_var) {
672 vg_assert(oea->n_fixed == 0);
673 return VG_(sizeXA)(oea->edges.var);
674 } else {
675 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
676 return oea->n_fixed;
680 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
682 if (oea->has_var) {
683 vg_assert(oea->n_fixed == 0);
684 VG_(deleteXA)(oea->edges.var);
685 oea->edges.var = NULL;
686 oea->has_var = False;
687 } else {
688 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
689 oea->n_fixed = 0;
693 static
694 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
696 if (oea->has_var) {
697 vg_assert(oea->n_fixed == 0);
698 return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
699 } else {
700 vg_assert(i < oea->n_fixed);
701 return &oea->edges.fixed[i];
705 static
706 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
708 if (oea->has_var) {
709 vg_assert(oea->n_fixed == 0);
710 VG_(removeIndexXA)(oea->edges.var, i);
711 } else {
712 vg_assert(i < oea->n_fixed);
713 for (; i+1 < oea->n_fixed; i++) {
714 oea->edges.fixed[i] = oea->edges.fixed[i+1];
716 oea->n_fixed--;
720 static
721 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
723 if (oea->has_var) {
724 vg_assert(oea->n_fixed == 0);
725 VG_(addToXA)(oea->edges.var, oe);
726 } else {
727 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
728 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
729 /* The fixed array is full, so we have to initialise an
730 XArray and copy the fixed array into it. */
731 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
732 ttaux_free,
733 sizeof(OutEdge));
734 VG_(hintSizeXA) (var, oea->n_fixed+1);
735 UWord i;
736 for (i = 0; i < oea->n_fixed; i++) {
737 VG_(addToXA)(var, &oea->edges.fixed[i]);
739 VG_(addToXA)(var, oe);
740 oea->n_fixed = 0;
741 oea->has_var = True;
742 oea->edges.var = var;
743 } else {
744 /* Just add to the fixed array. */
745 oea->edges.fixed[oea->n_fixed++] = *oe;
750 static
751 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
753 const HostExtent* hx1 = v1;
754 const HostExtent* hx2 = v2;
755 if (hx1->start + hx1->len <= hx2->start) return -1;
756 if (hx2->start + hx2->len <= hx1->start) return 1;
757 return 0; /* partial overlap */
760 /* True if hx is a dead host extent, i.e. corresponds to host code
761 of an entry that was invalidated. */
762 static
763 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
765 const TTEno tteNo = hx->tteNo;
766 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
767 VG_(printf) (m \
768 " start 0x%p len %u sector %d ttslot %u" \
769 " tt.entry 0x%lu tt.tcptr 0x%p\n", \
770 hx->start, hx->len, (int)(sec - sectors), \
771 hx->tteNo, \
772 sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr)
774 /* Entry might have been invalidated and not re-used yet.*/
775 if (sec->ttH[tteNo].status == Deleted) {
776 LDEBUG("found deleted entry");
777 return True;
779 /* Maybe we found this entry via a host_extents which was
780 inserted for an entry which was changed to Deleted then
781 re-used after. If this entry was re-used, then its tcptr
782 is >= to host_extents start (i.e. the previous tcptr) + len.
783 This is the case as there is no re-use of host code: a new
784 entry or re-used entry always gets "higher value" host code. */
785 if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) {
786 LDEBUG("found re-used entry");
787 return True;
790 return False;
791 #undef LDEBUG
794 static __attribute__((noinline))
795 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
796 /*OUT*/TTEno* from_tteNo,
797 void* hcode )
799 SECno i;
801 /* Search order logic copied from VG_(search_transtab). */
802 for (i = 0; i < n_sectors; i++) {
803 SECno sno = sector_search_order[i];
804 if (UNLIKELY(sno == INV_SNO))
805 return False; /* run out of sectors to search */
807 const Sector* sec = &sectors[sno];
808 const XArray* /* of HostExtent */ host_extents = sec->host_extents;
809 vg_assert(host_extents);
811 HostExtent key;
812 VG_(memset)(&key, 0, sizeof(key));
813 key.start = hcode;
814 key.len = 1;
815 Word firstW = -1, lastW = -1;
816 Bool found = VG_(lookupXA_UNSAFE)(
817 host_extents, &key, &firstW, &lastW,
818 HostExtent__cmpOrd );
819 vg_assert(firstW == lastW); // always true, even if not found
820 if (found) {
821 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
822 TTEno tteNo = hx->tteNo;
823 /* Do some additional sanity checks. */
824 vg_assert(tteNo < N_TTES_PER_SECTOR);
826 /* if this hx entry corresponds to dead host code, we must
827 tell this code has not been found, as it cannot be patched. */
828 if (HostExtent__is_dead (hx, sec))
829 return False;
831 vg_assert(sec->ttH[tteNo].status == InUse);
832 /* Can only half check that the found TTEntry contains hcode,
833 due to not having a length value for the hcode in the
834 TTEntry. */
835 vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode);
836 /* Looks plausible */
837 *from_sNo = sno;
838 *from_tteNo = tteNo;
839 return True;
842 return False;
846 /* Figure out whether or not hcode is jitted code present in the main
847 code cache (but not in the no-redir cache). Used for sanity
848 checking. */
849 static Bool is_in_the_main_TC ( const void* hcode )
851 SECno i, sno;
852 for (i = 0; i < n_sectors; i++) {
853 sno = sector_search_order[i];
854 if (sno == INV_SNO)
855 break; /* run out of sectors to search */
856 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
857 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
858 + sizeof(ULong) - 1)
859 return True;
861 return False;
865 /* Fulfill a chaining request, and record admin info so we
866 can undo it later, if required.
868 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
869 SECno to_sNo,
870 TTEno to_tteNo,
871 Bool to_fastEP )
873 /* Get the CPU info established at startup. */
874 VexArch arch_host = VexArch_INVALID;
875 VexArchInfo archinfo_host;
876 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
877 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
878 VexEndness endness_host = archinfo_host.endness;
880 // host_code is where we're patching to. So it needs to
881 // take into account, whether we're jumping to the slow
882 // or fast entry point. By definition, the fast entry point
883 // is exactly one event check's worth of code along from
884 // the slow (tcptr) entry point.
885 TTEntryC* to_tteC = index_tteC(to_sNo, to_tteNo);
886 void* host_code = ((UChar*)to_tteC->tcptr)
887 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
889 // stay sane -- the patch point (dst) is in this sector's code cache
890 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
891 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
892 + sizeof(ULong) - 1 );
894 /* Find the TTEntry for the from__ code. This isn't simple since
895 we only know the patch address, which is going to be somewhere
896 inside the from_ block. */
897 SECno from_sNo = INV_SNO;
898 TTEno from_tteNo = INV_TTE;
899 Bool from_found
900 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
901 from__patch_addr );
902 if (!from_found) {
903 // The from code might have been discarded due to sector re-use
904 // or marked Deleted due to translation invalidation.
905 // In such a case, don't do the chaining.
906 VG_(debugLog)(1,"transtab",
907 "host code %p not found (discarded? sector recycled?)"
908 " => no chaining done\n",
909 from__patch_addr);
910 return;
913 TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo);
915 /* Get VEX to do the patching itself. We have to hand it off
916 since it is host-dependent. */
917 VexInvalRange vir
918 = LibVEX_Chain(
919 arch_host, endness_host,
920 from__patch_addr,
921 VG_(fnptr_to_fnentry)(
922 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
923 : &VG_(disp_cp_chain_me_to_slowEP)),
924 (void*)host_code
926 VG_(invalidate_icache)( (void*)vir.start, vir.len );
928 /* Now do the tricky bit -- update the ch_succs and ch_preds info
929 for the two translations involved, so we can undo the chaining
930 later, which we will have to do if the to_ block gets removed
931 for whatever reason. */
933 /* This is the new from_ -> to_ link to add. */
934 InEdge ie;
935 InEdge__init(&ie);
936 ie.from_sNo = from_sNo;
937 ie.from_tteNo = from_tteNo;
938 ie.to_fastEP = to_fastEP;
939 HWord from_offs = (HWord)( (UChar*)from__patch_addr
940 - (UChar*)from_tteC->tcptr );
941 vg_assert(from_offs < 100000/* let's say */);
942 ie.from_offs = (UInt)from_offs;
944 /* This is the new to_ -> from_ backlink to add. */
945 OutEdge oe;
946 OutEdge__init(&oe);
947 oe.to_sNo = to_sNo;
948 oe.to_tteNo = to_tteNo;
949 oe.from_offs = (UInt)from_offs;
951 /* Add .. */
952 InEdgeArr__add(&to_tteC->in_edges, &ie);
953 OutEdgeArr__add(&from_tteC->out_edges, &oe);
957 /* Unchain one patch, as described by the specified InEdge. For
958 sanity check purposes only (to check that the patched location is
959 as expected) it also requires the fast and slow entry point
960 addresses of the destination block (that is, the block that owns
961 this InEdge). */
962 __attribute__((noinline))
963 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
964 InEdge* ie,
965 void* to_fastEPaddr, void* to_slowEPaddr )
967 vg_assert(ie);
968 TTEntryC* tteC
969 = index_tteC(ie->from_sNo, ie->from_tteNo);
970 UChar* place_to_patch
971 = ((UChar*)tteC->tcptr) + ie->from_offs;
972 UChar* disp_cp_chain_me
973 = VG_(fnptr_to_fnentry)(
974 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
975 : &VG_(disp_cp_chain_me_to_slowEP)
977 UChar* place_to_jump_to_EXPECTED
978 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
980 // stay sane: both src and dst for this unchaining are
981 // in the main code cache
982 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
983 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
984 // dst check is ok because LibVEX_UnChain checks that
985 // place_to_jump_to_EXPECTED really is the current dst, and
986 // asserts if it isn't.
987 VexInvalRange vir
988 = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
989 place_to_jump_to_EXPECTED, disp_cp_chain_me );
990 VG_(invalidate_icache)( (void*)vir.start, vir.len );
994 /* The specified block is about to be deleted. Update the preds and
995 succs of its associated blocks accordingly. This includes undoing
996 any chained jumps to this block. */
997 static
998 void unchain_in_preparation_for_deletion ( VexArch arch_host,
999 VexEndness endness_host,
1000 SECno here_sNo, TTEno here_tteNo )
1002 if (DEBUG_TRANSTAB)
1003 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
1004 UWord i, j, n, m;
1005 Int evCheckSzB = LibVEX_evCheckSzB(arch_host);
1006 TTEntryC* here_tteC = index_tteC(here_sNo, here_tteNo);
1007 TTEntryH* here_tteH = index_tteH(here_sNo, here_tteNo);
1008 if (DEBUG_TRANSTAB)
1009 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
1010 here_tteC->entry, here_tteC->tcptr);
1011 vg_assert(here_tteH->status == InUse);
1013 /* Visit all InEdges owned by here_tte. */
1014 n = InEdgeArr__size(&here_tteC->in_edges);
1015 for (i = 0; i < n; i++) {
1016 InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i);
1017 // Undo the chaining.
1018 UChar* here_slow_EP = (UChar*)here_tteC->tcptr;
1019 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
1020 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
1021 // Find the corresponding entry in the "from" node's out_edges,
1022 // and remove it.
1023 TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo);
1024 m = OutEdgeArr__size(&from_tteC->out_edges);
1025 vg_assert(m > 0); // it must have at least one entry
1026 for (j = 0; j < m; j++) {
1027 OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j);
1028 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
1029 && oe->from_offs == ie->from_offs)
1030 break;
1032 vg_assert(j < m); // "oe must be findable"
1033 OutEdgeArr__deleteIndex(&from_tteC->out_edges, j);
1036 /* Visit all OutEdges owned by here_tte. */
1037 n = OutEdgeArr__size(&here_tteC->out_edges);
1038 for (i = 0; i < n; i++) {
1039 OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i);
1040 // Find the corresponding entry in the "to" node's in_edges,
1041 // and remove it.
1042 TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo);
1043 m = InEdgeArr__size(&to_tteC->in_edges);
1044 vg_assert(m > 0); // it must have at least one entry
1045 for (j = 0; j < m; j++) {
1046 InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j);
1047 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
1048 && ie->from_offs == oe->from_offs)
1049 break;
1051 vg_assert(j < m); // "ie must be findable"
1052 InEdgeArr__deleteIndex(&to_tteC->in_edges, j);
1055 InEdgeArr__makeEmpty(&here_tteC->in_edges);
1056 OutEdgeArr__makeEmpty(&here_tteC->out_edges);
1060 /*-------------------------------------------------------------*/
1061 /*--- Address-range equivalence class stuff ---*/
1062 /*-------------------------------------------------------------*/
1064 /* Return equivalence class number for a range. */
1066 static EClassNo range_to_eclass ( Addr start, UInt len )
1068 UInt mask = (1 << ECLASS_WIDTH) - 1;
1069 UInt lo = (UInt)start;
1070 UInt hi = lo + len - 1;
1071 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
1072 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
1073 if (loBits == hiBits) {
1074 vg_assert(loBits < ECLASS_N-1);
1075 return loBits;
1076 } else {
1077 return ECLASS_MISC;
1082 /* Calculates the equivalence class numbers for any VexGuestExtent.
1083 These are written in *eclasses, which must be big enough to hold 3
1084 Ints. The number written, between 1 and 3, is returned. The
1085 eclasses are presented in order, and any duplicates are removed.
1088 static
1089 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1090 const TTEntryH* tteH )
1093 # define SWAP(_lv1,_lv2) \
1094 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1096 UInt i, j, n_ec;
1097 EClassNo r;
1099 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
1101 n_ec = 0;
1102 for (i = 0; i < tteH->vge_n_used; i++) {
1103 r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] );
1104 if (r == ECLASS_MISC)
1105 goto bad;
1106 /* only add if we haven't already seen it */
1107 for (j = 0; j < n_ec; j++)
1108 if (eclasses[j] == r)
1109 break;
1110 if (j == n_ec)
1111 eclasses[n_ec++] = r;
1114 if (n_ec == 1)
1115 return 1;
1117 if (n_ec == 2) {
1118 /* sort */
1119 if (eclasses[0] > eclasses[1])
1120 SWAP(eclasses[0], eclasses[1]);
1121 return 2;
1124 if (n_ec == 3) {
1125 /* sort */
1126 if (eclasses[0] > eclasses[2])
1127 SWAP(eclasses[0], eclasses[2]);
1128 if (eclasses[0] > eclasses[1])
1129 SWAP(eclasses[0], eclasses[1]);
1130 if (eclasses[1] > eclasses[2])
1131 SWAP(eclasses[1], eclasses[2]);
1132 return 3;
1135 /* NOTREACHED */
1136 vg_assert(0);
1138 bad:
1139 eclasses[0] = ECLASS_MISC;
1140 return 1;
1142 # undef SWAP
1146 /* Add tteno to the set of entries listed for equivalence class ec in
1147 this sector. Returns used location in eclass array. */
1149 static
1150 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1152 Int old_sz, new_sz, i, r;
1153 TTEno *old_ar, *new_ar;
1155 vg_assert(ec >= 0 && ec < ECLASS_N);
1156 vg_assert(tteno < N_TTES_PER_SECTOR);
1158 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1160 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1162 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1164 old_sz = sec->ec2tte_size[ec];
1165 old_ar = sec->ec2tte[ec];
1166 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1167 new_ar = ttaux_malloc("transtab.aECN.1",
1168 new_sz * sizeof(TTEno));
1169 for (i = 0; i < old_sz; i++)
1170 new_ar[i] = old_ar[i];
1171 if (old_ar)
1172 ttaux_free(old_ar);
1173 sec->ec2tte_size[ec] = new_sz;
1174 sec->ec2tte[ec] = new_ar;
1176 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1179 /* Common case */
1180 r = sec->ec2tte_used[ec]++;
1181 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1182 sec->ec2tte[ec][r] = tteno;
1183 return (UInt)r;
1187 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1188 eclass entries to 'sec'. */
1190 static
1191 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1193 Int i, r;
1194 EClassNo eclasses[3];
1195 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1197 TTEntryH* tteH = &sec->ttH[tteno];
1198 r = vexGuestExtents_to_eclasses( eclasses, tteH );
1199 vg_assert(r >= 1 && r <= 3);
1201 TTEntryC* tteC = &sec->ttC[tteno];
1202 tteC->n_tte2ec = r;
1203 for (i = 0; i < r; i++) {
1204 tteC->tte2ec_ec[i] = eclasses[i];
1205 tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1210 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1211 True if OK, False if something's not right. Expensive. */
1213 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1215 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1217 const HChar* whassup = NULL;
1218 Int j, k, n, ec_idx;
1219 EClassNo i;
1220 EClassNo ec_num;
1221 TTEno tteno;
1222 ULong* tce;
1224 /* Basic checks on this sector */
1225 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1226 BAD("invalid sec->tt_n_inuse");
1227 tce = sec->tc_next;
1228 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1229 BAD("sec->tc_next points outside tc");
1231 /* For each eclass ... */
1232 for (i = 0; i < ECLASS_N; i++) {
1233 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1234 BAD("ec2tte_size/ec2tte mismatch(1)");
1235 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1236 BAD("ec2tte_size/ec2tte mismatch(2)");
1237 if (sec->ec2tte_used[i] < 0
1238 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1239 BAD("implausible ec2tte_used");
1240 if (sec->ec2tte_used[i] == 0)
1241 continue;
1243 /* For each tt reference in each eclass .. ensure the reference
1244 is to a valid tt entry, and that the entry's address ranges
1245 really include this eclass. */
1247 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1248 tteno = sec->ec2tte[i][j];
1249 if (tteno == EC2TTE_DELETED)
1250 continue;
1251 if (tteno >= N_TTES_PER_SECTOR)
1252 BAD("implausible tteno");
1253 TTEntryC* tteC = &sec->ttC[tteno];
1254 TTEntryH* tteH = &sec->ttH[tteno];
1255 if (tteH->status != InUse)
1256 BAD("tteno points to non-inuse tte");
1257 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1258 BAD("tteC->n_tte2ec out of range");
1259 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1260 must equal i. Inspect tte's eclass info. */
1261 n = 0;
1262 for (k = 0; k < tteC->n_tte2ec; k++) {
1263 if (k < tteC->n_tte2ec-1
1264 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1])
1265 BAD("tteC->tte2ec_ec[..] out of order");
1266 ec_num = tteC->tte2ec_ec[k];
1267 if (ec_num < 0 || ec_num >= ECLASS_N)
1268 BAD("tteC->tte2ec_ec[..] out of range");
1269 if (ec_num != i)
1270 continue;
1271 ec_idx = tteC->tte2ec_ix[k];
1272 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1273 BAD("tteC->tte2ec_ix[..] out of range");
1274 if (ec_idx == j)
1275 n++;
1277 if (n != 1)
1278 BAD("tteno does not point back at eclass");
1282 /* That establishes that for each forward pointer from TTEntrys
1283 there is a corresponding backward pointer from the eclass[]
1284 arrays. However, it doesn't rule out the possibility of other,
1285 bogus pointers in the eclass[] arrays. So do those similarly:
1286 scan through them and check the TTEntryies they point at point
1287 back. */
1289 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1291 TTEntryC* tteC = &sec->ttC[tteno];
1292 TTEntryH* tteH = &sec->ttH[tteno];
1293 if (tteH->status == Empty || tteH->status == Deleted) {
1294 if (tteC->n_tte2ec != 0)
1295 BAD("tteC->n_tte2ec nonzero for unused tte");
1296 continue;
1299 vg_assert(tteH->status == InUse);
1301 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1302 BAD("tteC->n_tte2ec out of range(2)");
1304 for (j = 0; j < tteC->n_tte2ec; j++) {
1305 ec_num = tteC->tte2ec_ec[j];
1306 if (ec_num < 0 || ec_num >= ECLASS_N)
1307 BAD("tteC->tte2ec_ec[..] out of range");
1308 ec_idx = tteC->tte2ec_ix[j];
1309 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1310 BAD("tteC->tte2ec_ix[..] out of range(2)");
1311 if (sec->ec2tte[ec_num][ec_idx] != tteno)
1312 BAD("ec2tte does not point back to tte");
1316 return True;
1318 bad:
1319 if (whassup)
1320 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1322 # if 0
1323 VG_(printf)("eclass = %d\n", i);
1324 VG_(printf)("tteno = %d\n", (Int)tteno);
1325 switch (tte->status) {
1326 case InUse: VG_(printf)("InUse\n"); break;
1327 case Deleted: VG_(printf)("Deleted\n"); break;
1328 case Empty: VG_(printf)("Empty\n"); break;
1330 if (tte->status != Empty) {
1331 for (k = 0; k < tte->vge.n_used; k++)
1332 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1334 # endif
1336 return False;
1338 # undef BAD
1342 /* Sanity check absolutely everything. True == check passed. */
1344 /* forwards */
1345 static Bool sanity_check_redir_tt_tc ( void );
1347 static Bool sanity_check_sector_search_order ( void )
1349 SECno i, j, nListed;
1350 /* assert the array is the right size */
1351 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1352 / sizeof(sector_search_order[0])));
1353 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1354 for (i = 0; i < n_sectors; i++) {
1355 if (sector_search_order[i] == INV_SNO
1356 || sector_search_order[i] >= n_sectors)
1357 break;
1359 nListed = i;
1360 for (/* */; i < n_sectors; i++) {
1361 if (sector_search_order[i] != INV_SNO)
1362 break;
1364 if (i != n_sectors)
1365 return False;
1366 /* Check each sector number only appears once */
1367 for (i = 0; i < n_sectors; i++) {
1368 if (sector_search_order[i] == INV_SNO)
1369 continue;
1370 for (j = i+1; j < n_sectors; j++) {
1371 if (sector_search_order[j] == sector_search_order[i])
1372 return False;
1375 /* Check that the number of listed sectors equals the number
1376 in use, by counting nListed back down. */
1377 for (i = 0; i < n_sectors; i++) {
1378 if (sectors[i].tc != NULL)
1379 nListed--;
1381 if (nListed != 0)
1382 return False;
1383 return True;
1386 static Bool sanity_check_all_sectors ( void )
1388 SECno sno;
1389 Bool sane;
1390 Sector* sec;
1391 for (sno = 0; sno < n_sectors; sno++) {
1392 Int i;
1393 Int nr_not_dead_hx = 0;
1394 Int szhxa;
1395 sec = &sectors[sno];
1396 if (sec->tc == NULL)
1397 continue;
1398 sane = sanity_check_eclasses_in_sector( sec );
1399 if (!sane)
1400 return False;
1401 szhxa = VG_(sizeXA)(sec->host_extents);
1402 for (i = 0; i < szhxa; i++) {
1403 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1404 if (!HostExtent__is_dead (hx, sec))
1405 nr_not_dead_hx++;
1407 if (nr_not_dead_hx != sec->tt_n_inuse) {
1408 VG_(debugLog)(0, "transtab",
1409 "nr_not_dead_hx %d sanity fail "
1410 "(expected == in use %d)\n",
1411 nr_not_dead_hx, sec->tt_n_inuse);
1412 return False;
1416 if ( !sanity_check_redir_tt_tc() )
1417 return False;
1418 if ( !sanity_check_sector_search_order() )
1419 return False;
1420 return True;
1425 /*-------------------------------------------------------------*/
1426 /*--- Add/find translations ---*/
1427 /*-------------------------------------------------------------*/
1429 static UInt vge_osize ( const VexGuestExtents* vge )
1431 UInt i, n = 0;
1432 for (i = 0; i < vge->n_used; i++)
1433 n += (UInt)vge->len[i];
1434 return n;
1437 static UInt TTEntryH__osize ( const TTEntryH* tteH )
1439 UInt i, n = 0;
1440 for (i = 0; i < tteH->vge_n_used; i++)
1441 n += (UInt)tteH->vge_len[i];
1442 return n;
1445 static Bool isValidSector ( SECno sector )
1447 if (sector == INV_SNO || sector >= n_sectors)
1448 return False;
1449 return True;
1452 static inline HTTno HASH_TT ( Addr key )
1454 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1455 UInt kLo = (UInt)key;
1456 UInt k32 = kHi ^ kLo;
1457 UInt ror = 7;
1458 if (ror > 0)
1459 k32 = (k32 >> ror) | (k32 << (32-ror));
1460 return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1463 /* Invalidate the fast cache VG_(tt_fast). */
1464 static void invalidateFastCache ( void )
1466 for (UWord j = 0; j < VG_TT_FAST_SETS; j++) {
1467 FastCacheSet* set = &VG_(tt_fast)[j];
1468 set->guest0 = TRANSTAB_BOGUS_GUEST_ADDR;
1469 set->guest1 = TRANSTAB_BOGUS_GUEST_ADDR;
1470 set->guest2 = TRANSTAB_BOGUS_GUEST_ADDR;
1471 set->guest3 = TRANSTAB_BOGUS_GUEST_ADDR;
1473 n_fast_flushes++;
1476 /* Invalidate a single fast cache entry. */
1477 static void invalidateFastCacheEntry ( Addr guest )
1479 /* This shouldn't fail. It should be assured by m_translate
1480 which should reject any attempt to make translation of code
1481 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1482 vg_assert(guest != TRANSTAB_BOGUS_GUEST_ADDR);
1483 /* If any entry in the line is the right one, just set it to
1484 TRANSTAB_BOGUS_GUEST_ADDR. Doing so ensure that the entry will never
1485 be used in future, so will eventually fall off the end of the line,
1486 due to LRU replacement, and be replaced with something that's actually
1487 useful. */
1488 UWord setNo = (UInt)VG_TT_FAST_HASH(guest);
1489 FastCacheSet* set = &VG_(tt_fast)[setNo];
1490 if (set->guest0 == guest) {
1491 set->guest0 = TRANSTAB_BOGUS_GUEST_ADDR;
1493 if (set->guest1 == guest) {
1494 set->guest1 = TRANSTAB_BOGUS_GUEST_ADDR;
1496 if (set->guest2 == guest) {
1497 set->guest2 = TRANSTAB_BOGUS_GUEST_ADDR;
1499 if (set->guest3 == guest) {
1500 set->guest3 = TRANSTAB_BOGUS_GUEST_ADDR;
1504 static void setFastCacheEntry ( Addr guest, ULong* tcptr )
1506 /* This shouldn't fail. It should be assured by m_translate
1507 which should reject any attempt to make translation of code
1508 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1509 vg_assert(guest != TRANSTAB_BOGUS_GUEST_ADDR);
1510 /* Shift all entries along one, so that the LRU one disappears, and put the
1511 new entry at the MRU position. */
1512 UWord setNo = (UInt)VG_TT_FAST_HASH(guest);
1513 FastCacheSet* set = &VG_(tt_fast)[setNo];
1514 set->host3 = set->host2;
1515 set->guest3 = set->guest2;
1516 set->host2 = set->host1;
1517 set->guest2 = set->guest1;
1518 set->host1 = set->host0;
1519 set->guest1 = set->guest0;
1520 set->host0 = (Addr)tcptr;
1521 set->guest0 = guest;
1522 n_fast_updates++;
1526 static TTEno get_empty_tt_slot(SECno sNo)
1528 TTEno i;
1530 i = sectors[sNo].empty_tt_list;
1531 sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte;
1533 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1535 return i;
1538 static void add_to_empty_tt_list (SECno sNo, TTEno tteno)
1540 sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1541 sectors[sNo].empty_tt_list = tteno;
1544 static void initialiseSector ( SECno sno )
1546 UInt i;
1547 SysRes sres;
1548 Sector* sec;
1549 vg_assert(isValidSector(sno));
1551 { Bool sane = sanity_check_sector_search_order();
1552 vg_assert(sane);
1554 sec = &sectors[sno];
1556 if (sec->tc == NULL) {
1558 /* Sector has never been used before. Need to allocate tt and
1559 tc. */
1560 vg_assert(sec->ttC == NULL);
1561 vg_assert(sec->ttH == NULL);
1562 vg_assert(sec->tc_next == NULL);
1563 vg_assert(sec->tt_n_inuse == 0);
1564 for (EClassNo e = 0; e < ECLASS_N; e++) {
1565 vg_assert(sec->ec2tte_size[e] == 0);
1566 vg_assert(sec->ec2tte_used[e] == 0);
1567 vg_assert(sec->ec2tte[e] == NULL);
1569 vg_assert(sec->host_extents == NULL);
1571 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1572 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1574 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1575 if (sr_isError(sres)) {
1576 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1577 8 * tc_sector_szQ );
1578 /*NOTREACHED*/
1580 sec->tc = (ULong*)(Addr)sr_Res(sres);
1582 sres = VG_(am_mmap_anon_float_valgrind)
1583 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1584 if (sr_isError(sres)) {
1585 VG_(out_of_memory_NORETURN)("initialiseSector(TTC)",
1586 N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1587 /*NOTREACHED*/
1589 sec->ttC = (TTEntryC*)(Addr)sr_Res(sres);
1591 sres = VG_(am_mmap_anon_float_valgrind)
1592 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1593 if (sr_isError(sres)) {
1594 VG_(out_of_memory_NORETURN)("initialiseSector(TTH)",
1595 N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1596 /*NOTREACHED*/
1598 sec->ttH = (TTEntryH*)(Addr)sr_Res(sres);
1600 sec->empty_tt_list = HTT_EMPTY;
1601 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1602 sec->ttH[ei].status = Empty;
1603 sec->ttC[ei].n_tte2ec = 0;
1604 add_to_empty_tt_list(sno, ei);
1607 sres = VG_(am_mmap_anon_float_valgrind)
1608 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1609 if (sr_isError(sres)) {
1610 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1611 N_HTTES_PER_SECTOR * sizeof(TTEno) );
1612 /*NOTREACHED*/
1614 sec->htt = (TTEno*)(Addr)sr_Res(sres);
1615 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1616 sec->htt[hi] = HTT_EMPTY;
1618 /* Set up the host_extents array. */
1619 sec->host_extents
1620 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1621 ttaux_free,
1622 sizeof(HostExtent));
1624 /* Add an entry in the sector_search_order */
1625 for (i = 0; i < n_sectors; i++) {
1626 if (sector_search_order[i] == INV_SNO)
1627 break;
1629 vg_assert(i >= 0 && i < n_sectors);
1630 sector_search_order[i] = sno;
1632 if (VG_(clo_verbosity) > 2)
1633 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1635 } else {
1637 /* Sector has been used before. Dump the old contents. */
1638 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1639 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1640 n_sectors_recycled++;
1642 vg_assert(sec->ttC != NULL);
1643 vg_assert(sec->ttH != NULL);
1644 vg_assert(sec->tc_next != NULL);
1645 n_dump_count += sec->tt_n_inuse;
1647 VexArch arch_host = VexArch_INVALID;
1648 VexArchInfo archinfo_host;
1649 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1650 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1651 VexEndness endness_host = archinfo_host.endness;
1653 /* Visit each just-about-to-be-abandoned translation. */
1654 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1655 sno);
1656 sec->empty_tt_list = HTT_EMPTY;
1657 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1658 if (sec->ttH[ei].status == InUse) {
1659 vg_assert(sec->ttC[ei].n_tte2ec >= 1);
1660 vg_assert(sec->ttC[ei].n_tte2ec <= 3);
1661 n_dump_osize += TTEntryH__osize(&sec->ttH[ei]);
1662 /* Tell the tool too. */
1663 if (VG_(needs).superblock_discards) {
1664 VexGuestExtents vge_tmp;
1665 TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] );
1666 VG_TDICT_CALL( tool_discard_superblock_info,
1667 sec->ttC[ei].entry, vge_tmp );
1669 unchain_in_preparation_for_deletion(arch_host,
1670 endness_host, sno, ei);
1671 } else {
1672 vg_assert(sec->ttC[ei].n_tte2ec == 0);
1674 sec->ttH[ei].status = Empty;
1675 sec->ttC[ei].n_tte2ec = 0;
1676 add_to_empty_tt_list(sno, ei);
1678 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1679 sec->htt[hi] = HTT_EMPTY;
1681 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1682 sno);
1684 /* Free up the eclass structures. */
1685 for (EClassNo e = 0; e < ECLASS_N; e++) {
1686 if (sec->ec2tte_size[e] == 0) {
1687 vg_assert(sec->ec2tte_used[e] == 0);
1688 vg_assert(sec->ec2tte[e] == NULL);
1689 } else {
1690 vg_assert(sec->ec2tte[e] != NULL);
1691 ttaux_free(sec->ec2tte[e]);
1692 sec->ec2tte[e] = NULL;
1693 sec->ec2tte_size[e] = 0;
1694 sec->ec2tte_used[e] = 0;
1698 /* Empty out the host extents array. */
1699 vg_assert(sec->host_extents != NULL);
1700 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1701 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1703 /* Sanity check: ensure it is already in
1704 sector_search_order[]. */
1705 SECno ix;
1706 for (ix = 0; ix < n_sectors; ix++) {
1707 if (sector_search_order[ix] == sno)
1708 break;
1710 vg_assert(ix >= 0 && ix < n_sectors);
1712 if (VG_(clo_verbosity) > 2)
1713 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1716 sec->tc_next = sec->tc;
1717 sec->tt_n_inuse = 0;
1719 invalidateFastCache();
1721 { Bool sane = sanity_check_sector_search_order();
1722 vg_assert(sane);
1726 /* Add a translation of vge to TT/TC. The translation is temporarily
1727 in code[0 .. code_len-1].
1729 pre: youngest_sector points to a valid (although possibly full)
1730 sector.
1732 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1733 Addr entry,
1734 Addr code,
1735 UInt code_len,
1736 Bool is_self_checking,
1737 Int offs_profInc,
1738 UInt n_guest_instrs )
1740 Int tcAvailQ, reqdQ, y;
1741 ULong *tcptr, *tcptr2;
1742 UChar* srcP;
1743 UChar* dstP;
1745 vg_assert(init_done);
1746 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1748 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1749 vg_assert(code_len > 0 && code_len < 60000);
1751 /* Generally stay sane */
1752 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1754 if (DEBUG_TRANSTAB)
1755 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1756 entry, code_len);
1758 n_in_count++;
1759 n_in_tsize += code_len;
1760 n_in_osize += vge_osize(vge);
1761 if (is_self_checking)
1762 n_in_sc_count++;
1764 y = youngest_sector;
1765 vg_assert(isValidSector(y));
1767 if (sectors[y].tc == NULL)
1768 initialiseSector(y);
1770 /* Try putting the translation in this sector. */
1771 reqdQ = (code_len + 7) >> 3;
1773 /* Will it fit in tc? */
1774 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1775 - ((ULong*)(sectors[y].tc_next));
1776 vg_assert(tcAvailQ >= 0);
1777 vg_assert(tcAvailQ <= tc_sector_szQ);
1779 if (tcAvailQ < reqdQ
1780 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1781 /* No. So move on to the next sector. Either it's never been
1782 used before, in which case it will get its tt/tc allocated
1783 now, or it has been used before, in which case it is set to be
1784 empty, hence throwing out the oldest sector. */
1785 vg_assert(tc_sector_szQ > 0);
1786 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1787 / N_HTTES_PER_SECTOR;
1788 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1789 / tc_sector_szQ;
1790 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1791 VG_(dmsg)("transtab: "
1792 "declare sector %d full "
1793 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1794 y, tt_loading_pct, tc_loading_pct,
1795 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1797 youngest_sector++;
1798 if (youngest_sector >= n_sectors)
1799 youngest_sector = 0;
1800 y = youngest_sector;
1801 initialiseSector(y);
1804 /* Be sure ... */
1805 tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1806 - ((ULong*)(sectors[y].tc_next));
1807 vg_assert(tcAvailQ >= 0);
1808 vg_assert(tcAvailQ <= tc_sector_szQ);
1809 vg_assert(tcAvailQ >= reqdQ);
1810 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1811 vg_assert(sectors[y].tt_n_inuse >= 0);
1813 /* Copy into tc. */
1814 tcptr = sectors[y].tc_next;
1815 vg_assert(tcptr >= &sectors[y].tc[0]);
1816 vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1818 dstP = (UChar*)tcptr;
1819 srcP = (UChar*)code;
1820 VG_(memcpy)(dstP, srcP, code_len);
1821 sectors[y].tc_next += reqdQ;
1822 sectors[y].tt_n_inuse++;
1824 /* more paranoia */
1825 tcptr2 = sectors[y].tc_next;
1826 vg_assert(tcptr2 >= &sectors[y].tc[0]);
1827 vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1829 /* Find an empty tt slot, and use it. There must be such a slot
1830 since tt is never allowed to get completely full. */
1831 TTEno tteix = get_empty_tt_slot(y);
1832 TTEntryC__init(&sectors[y].ttC[tteix]);
1833 TTEntryH__init(&sectors[y].ttH[tteix]);
1834 sectors[y].ttC[tteix].tcptr = tcptr;
1835 sectors[y].ttC[tteix].usage.prof.count = 0;
1837 sectors[y].ttC[tteix].usage.prof.weight
1838 = False
1839 ? // Count guest instrs (assumes all side exits are untaken)
1840 (n_guest_instrs == 0 ? 1 : n_guest_instrs)
1841 : // Counts some (not very good) approximation to host instructions
1842 (code_len == 0 ? 1 : (code_len / 4));
1844 sectors[y].ttC[tteix].entry = entry;
1845 TTEntryH__from_VexGuestExtents( &sectors[y].ttH[tteix], vge );
1846 sectors[y].ttH[tteix].status = InUse;
1848 // Point an htt entry to the tt slot
1849 HTTno htti = HASH_TT(entry);
1850 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1851 while (True) {
1852 if (sectors[y].htt[htti] == HTT_EMPTY
1853 || sectors[y].htt[htti] == HTT_DELETED)
1854 break;
1855 htti++;
1856 if (htti >= N_HTTES_PER_SECTOR)
1857 htti = 0;
1859 sectors[y].htt[htti] = tteix;
1861 /* Patch in the profile counter location, if necessary. */
1862 if (offs_profInc != -1) {
1863 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1864 VexArch arch_host = VexArch_INVALID;
1865 VexArchInfo archinfo_host;
1866 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1867 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1868 VexEndness endness_host = archinfo_host.endness;
1869 VexInvalRange vir
1870 = LibVEX_PatchProfInc( arch_host, endness_host,
1871 dstP + offs_profInc,
1872 &sectors[y].ttC[tteix].usage.prof.count );
1873 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1876 VG_(invalidate_icache)( dstP, code_len );
1878 /* Add this entry to the host_extents map, checking that we're
1879 adding in order. */
1880 { HostExtent hx;
1881 hx.start = (UChar*)tcptr;
1882 hx.len = code_len;
1883 hx.tteNo = tteix;
1884 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1885 XArray* hx_array = sectors[y].host_extents;
1886 vg_assert(hx_array);
1887 Word n = VG_(sizeXA)(hx_array);
1888 if (n > 0) {
1889 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1890 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1892 VG_(addToXA)(hx_array, &hx);
1893 if (DEBUG_TRANSTAB)
1894 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1895 hx.start, hx.len, y, tteix);
1898 /* Update the fast-cache. */
1899 setFastCacheEntry( entry, tcptr );
1901 /* Note the eclass numbers for this translation. */
1902 upd_eclasses_after_add( &sectors[y], tteix );
1906 /* Search for the translation of the given guest address. If
1907 requested, a successful search can also cause the fast-caches to be
1908 updated.
1910 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
1911 /*OUT*/SECno* res_sNo,
1912 /*OUT*/TTEno* res_tteNo,
1913 Addr guest_addr,
1914 Bool upd_cache )
1916 SECno i, sno;
1917 HTTno j, k, kstart;
1918 TTEno tti;
1920 vg_assert(init_done);
1921 /* Find the initial probe point just once. It will be the same in
1922 all sectors and avoids multiple expensive % operations. */
1923 n_full_lookups++;
1924 kstart = HASH_TT(guest_addr);
1925 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1927 /* Search in all the sectors,using sector_search_order[] as a
1928 heuristic guide as to what order to visit the sectors. */
1929 for (i = 0; i < n_sectors; i++) {
1931 sno = sector_search_order[i];
1932 if (UNLIKELY(sno == INV_SNO))
1933 return False; /* run out of sectors to search */
1935 k = kstart;
1936 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1937 n_lookup_probes++;
1938 tti = sectors[sno].htt[k];
1939 if (tti < N_TTES_PER_SECTOR
1940 && sectors[sno].ttC[tti].entry == guest_addr) {
1941 /* found it */
1942 if (upd_cache)
1943 setFastCacheEntry(
1944 guest_addr, sectors[sno].ttC[tti].tcptr );
1945 if (res_hcode)
1946 *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr;
1947 if (res_sNo)
1948 *res_sNo = sno;
1949 if (res_tteNo)
1950 *res_tteNo = tti;
1951 /* pull this one one step closer to the front. For large
1952 apps this more or less halves the number of required
1953 probes. */
1954 if (i > 0) {
1955 Int tmp = sector_search_order[i-1];
1956 sector_search_order[i-1] = sector_search_order[i];
1957 sector_search_order[i] = tmp;
1959 return True;
1961 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1962 if (sectors[sno].htt[k] == HTT_EMPTY)
1963 break; /* not found in this sector */
1964 k++;
1965 if (k == N_HTTES_PER_SECTOR)
1966 k = 0;
1969 /* If we fall off the end, all entries are InUse and not
1970 matching, or Deleted. In any case we did not find it in this
1971 sector. */
1974 /* Not found in any sector. */
1975 return False;
1979 /*-------------------------------------------------------------*/
1980 /*--- Delete translations. ---*/
1981 /*-------------------------------------------------------------*/
1983 /* forward */
1984 static void unredir_discard_translations( Addr, ULong );
1986 /* Stuff for deleting translations which intersect with a given
1987 address range. Unfortunately, to make this run at a reasonable
1988 speed, it is complex. */
1990 static inline
1991 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1993 Addr e1 = s1 + r1 - 1;
1994 Addr e2 = s2 + r2 - 1;
1995 if (e1 < s2 || e2 < s1)
1996 return False;
1997 return True;
2000 static inline
2001 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH )
2003 if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0]))
2004 return True;
2005 if (tteH->vge_n_used < 2)
2006 return False;
2007 if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1]))
2008 return True;
2009 if (tteH->vge_n_used < 3)
2010 return False;
2011 if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2]))
2012 return True;
2013 return False;
2017 /* Delete a tt entry, and update all the eclass data accordingly. */
2019 static void delete_tte ( /*OUT*/Addr* ga_deleted,
2020 /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
2021 VexArch arch_host, VexEndness endness_host )
2023 Int i, ec_idx;
2024 EClassNo ec_num;
2026 /* sec and secNo are mutually redundant; cross-check. */
2027 vg_assert(sec == &sectors[secNo]);
2029 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
2030 TTEntryC* tteC = &sec->ttC[tteno];
2031 TTEntryH* tteH = &sec->ttH[tteno];
2032 vg_assert(tteH->status == InUse);
2033 vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3);
2035 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
2036 vg_assert(tteH->vge_base[0] != TRANSTAB_BOGUS_GUEST_ADDR);
2037 *ga_deleted = tteH->vge_base[0];
2039 /* Unchain .. */
2040 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
2042 /* Deal with the ec-to-tte links first. */
2043 for (i = 0; i < tteC->n_tte2ec; i++) {
2044 ec_num = tteC->tte2ec_ec[i];
2045 ec_idx = tteC->tte2ec_ix[i];
2046 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
2047 vg_assert(ec_idx >= 0);
2048 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
2049 /* Assert that the two links point at each other. */
2050 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
2051 /* "delete" the pointer back to here. */
2052 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
2055 /* Now fix up this TTEntry. */
2056 /* Mark the entry as deleted in htt.
2057 Note: we could avoid the below hash table search by
2058 adding a reference from tte to its hash position in tt. */
2059 HTTno j;
2060 HTTno k = HASH_TT(tteC->entry);
2061 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
2062 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
2063 if (sec->htt[k] == tteno)
2064 break;
2065 k++;
2066 if (k == N_HTTES_PER_SECTOR)
2067 k = 0;
2069 vg_assert(j < N_HTTES_PER_SECTOR);
2070 sec->htt[k] = HTT_DELETED;
2071 tteH->status = Deleted;
2072 tteC->n_tte2ec = 0;
2073 add_to_empty_tt_list(secNo, tteno);
2075 /* Stats .. */
2076 sec->tt_n_inuse--;
2077 n_disc_count++;
2078 n_disc_osize += TTEntryH__osize(tteH);
2080 /* Tell the tool too. */
2081 if (VG_(needs).superblock_discards) {
2082 VexGuestExtents vge_tmp;
2083 TTEntryH__to_VexGuestExtents( &vge_tmp, tteH );
2084 VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp );
2089 /* Delete translations from sec which intersect specified range, but
2090 only consider translations in the specified eclass. */
2092 static
2093 SizeT delete_translations_in_sector_eclass ( /*OUT*/Addr* ga_deleted,
2094 /*MOD*/Sector* sec, SECno secNo,
2095 Addr guest_start, ULong range,
2096 EClassNo ec,
2097 VexArch arch_host,
2098 VexEndness endness_host )
2100 Int i;
2101 TTEno tteno;
2102 SizeT numDeld = 0;
2104 vg_assert(ec >= 0 && ec < ECLASS_N);
2106 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
2108 tteno = sec->ec2tte[ec][i];
2109 if (tteno == EC2TTE_DELETED) {
2110 /* already deleted */
2111 continue;
2114 vg_assert(tteno < N_TTES_PER_SECTOR);
2116 TTEntryH* tteH = &sec->ttH[tteno];
2117 vg_assert(tteH->status == InUse);
2119 if (overlaps( guest_start, range, tteH )) {
2120 numDeld++;
2121 delete_tte( ga_deleted, sec, secNo, tteno, arch_host, endness_host );
2126 return numDeld;
2130 /* Delete translations from sec which intersect specified range, the
2131 slow way, by inspecting all translations in sec. */
2133 static
2134 SizeT delete_translations_in_sector ( /*OUT*/Addr* ga_deleted,
2135 /*MOD*/Sector* sec, SECno secNo,
2136 Addr guest_start, ULong range,
2137 VexArch arch_host,
2138 VexEndness endness_host )
2140 TTEno i;
2141 SizeT numDeld = 0;
2143 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2144 /* The entire and only purpose of splitting TTEntry into cold
2145 and hot parts (TTEntryC and TTEntryH) is to make this search
2146 loop run as fast as possible, by avoiding having to pull all
2147 of the cold data up the memory hierarchy. */
2148 if (UNLIKELY(sec->ttH[i].status == InUse
2149 && overlaps( guest_start, range, &sec->ttH[i] ))) {
2150 numDeld++;
2151 delete_tte( ga_deleted, sec, secNo, i, arch_host, endness_host );
2155 return numDeld;
2159 void VG_(discard_translations) ( Addr guest_start, ULong range,
2160 const HChar* who )
2162 Sector* sec;
2163 SECno sno;
2164 EClassNo ec;
2166 /* It is very commonly the case that a call here results in discarding of
2167 exactly one superblock. As an optimisation only, use ga_deleted and
2168 numDeleted to detect this situation and to record the guest addr involved.
2169 That is then used to avoid calling invalidateFastCache in this case.
2170 Instead the individual entry in the fast cache is removed. This can reduce
2171 the overall VG_(fast_cache) miss rate significantly in applications that do
2172 a lot of short code discards (basically jit generated code that is
2173 subsequently patched).
2175 ga_deleted is made to hold the guest address of the last superblock deleted
2176 (in this call to VG_(discard_translations)). If more than one superblock
2177 is deleted (or none), then we ignore what is written to ga_deleted. If
2178 exactly one superblock is deleted then ga_deleted holds exactly what we
2179 want and will be used.
2181 Addr ga_deleted = TRANSTAB_BOGUS_GUEST_ADDR;
2182 SizeT numDeleted = 0;
2184 vg_assert(init_done);
2186 VG_(debugLog)(2, "transtab",
2187 "discard_translations(0x%lx, %llu) req by %s\n",
2188 guest_start, range, who );
2190 /* Pre-deletion sanity check */
2191 if (VG_(clo_sanity_level) >= 4) {
2192 Bool sane = sanity_check_all_sectors();
2193 vg_assert(sane);
2196 if (range == 0)
2197 return;
2199 VexArch arch_host = VexArch_INVALID;
2200 VexArchInfo archinfo_host;
2201 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2202 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2203 VexEndness endness_host = archinfo_host.endness;
2205 /* There are two different ways to do this.
2207 If the range fits within a single address-range equivalence
2208 class, as will be the case for a cache line sized invalidation,
2209 then we only have to inspect the set of translations listed in
2210 that equivalence class, and also in the "sin-bin" equivalence
2211 class ECLASS_MISC.
2213 Otherwise, the invalidation is of a larger range and probably
2214 results from munmap. In this case it's (probably!) faster just
2215 to inspect all translations, dump those we don't want, and
2216 regenerate the equivalence class information (since modifying it
2217 in-situ is even more expensive).
2220 /* First off, figure out if the range falls within a single class,
2221 and if so which one. */
2223 ec = ECLASS_MISC;
2224 if (range <= (1ULL << ECLASS_SHIFT))
2225 ec = range_to_eclass( guest_start, (UInt)range );
2227 /* if ec is ECLASS_MISC then we aren't looking at just a single
2228 class, so use the slow scheme. Else use the fast scheme,
2229 examining 'ec' and ECLASS_MISC. */
2231 if (ec != ECLASS_MISC) {
2233 VG_(debugLog)(2, "transtab",
2234 " FAST, ec = %d\n", ec);
2236 /* Fast scheme */
2237 vg_assert(ec >= 0 && ec < ECLASS_MISC);
2239 for (sno = 0; sno < n_sectors; sno++) {
2240 sec = &sectors[sno];
2241 if (sec->tc == NULL)
2242 continue;
2243 numDeleted += delete_translations_in_sector_eclass(
2244 &ga_deleted, sec, sno, guest_start, range,
2245 ec, arch_host, endness_host
2247 numDeleted += delete_translations_in_sector_eclass(
2248 &ga_deleted, sec, sno, guest_start, range,
2249 ECLASS_MISC, arch_host, endness_host
2253 } else {
2255 /* slow scheme */
2257 VG_(debugLog)(2, "transtab",
2258 " SLOW, ec = %d\n", ec);
2260 for (sno = 0; sno < n_sectors; sno++) {
2261 sec = &sectors[sno];
2262 if (sec->tc == NULL)
2263 continue;
2264 numDeleted += delete_translations_in_sector(
2265 &ga_deleted, sec, sno, guest_start, range,
2266 arch_host, endness_host
2272 if (numDeleted == 0) {
2273 // "ga_deleted was never set"
2274 vg_assert(ga_deleted == TRANSTAB_BOGUS_GUEST_ADDR);
2275 } else
2276 if (numDeleted == 1) {
2277 // "ga_deleted was set to something valid"
2278 vg_assert(ga_deleted != TRANSTAB_BOGUS_GUEST_ADDR);
2279 // Just invalidate the individual VG_(tt_fast) cache entry \o/
2280 invalidateFastCacheEntry(ga_deleted);
2281 Addr fake_host = 0;
2282 vg_assert(! VG_(lookupInFastCache)(&fake_host, ga_deleted));
2283 } else {
2284 // "ga_deleted was set to something valid"
2285 vg_assert(ga_deleted != TRANSTAB_BOGUS_GUEST_ADDR);
2286 // Nuke the entire VG_(tt_fast) cache. Sigh.
2287 invalidateFastCache();
2290 /* don't forget the no-redir cache */
2291 unredir_discard_translations( guest_start, range );
2293 /* Post-deletion sanity check */
2294 if (VG_(clo_sanity_level) >= 4) {
2295 TTEno i;
2296 Bool sane = sanity_check_all_sectors();
2297 vg_assert(sane);
2298 /* But now, also check the requested address range isn't
2299 present anywhere. */
2300 for (sno = 0; sno < n_sectors; sno++) {
2301 sec = &sectors[sno];
2302 if (sec->tc == NULL)
2303 continue;
2304 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2305 TTEntryH* tteH = &sec->ttH[i];
2306 if (tteH->status != InUse)
2307 continue;
2308 vg_assert(!overlaps( guest_start, range, tteH ));
2314 /* Whether or not tools may discard translations. */
2315 Bool VG_(ok_to_discard_translations) = False;
2317 /* This function is exported to tools which can use it to discard
2318 translations, provided it is safe to do so. */
2319 void VG_(discard_translations_safely) ( Addr start, SizeT len,
2320 const HChar* who )
2322 vg_assert(VG_(ok_to_discard_translations));
2323 VG_(discard_translations)(start, len, who);
2326 /*------------------------------------------------------------*/
2327 /*--- AUXILIARY: the unredirected TT/TC ---*/
2328 /*------------------------------------------------------------*/
2330 /* A very simple translation cache which holds a small number of
2331 unredirected translations. This is completely independent of the
2332 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2333 both structures are simply dumped and we start over.
2335 Since these translations are unredirected, the search key is (by
2336 definition) the first address entry in the .vge field. */
2338 /* Sized to hold 500 translations of average size 1000 bytes. */
2340 #define UNREDIR_SZB 1000
2342 #define N_UNREDIR_TT 500
2343 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2345 typedef
2346 struct {
2347 VexGuestExtents vge;
2348 Addr hcode;
2349 Bool inUse;
2351 UTCEntry;
2353 /* We just allocate forwards in _tc, never deleting. */
2354 static ULong *unredir_tc;
2355 static Int unredir_tc_used = N_UNREDIR_TCQ;
2357 /* Slots in _tt can come into use and out again (.inUse).
2358 Nevertheless _tt_highwater is maintained so that invalidations
2359 don't have to scan all the slots when only a few are in use.
2360 _tt_highwater holds the index of the highest ever allocated
2361 slot. */
2362 static UTCEntry unredir_tt[N_UNREDIR_TT];
2363 static Int unredir_tt_highwater;
2366 static void init_unredir_tt_tc ( void )
2368 Int i;
2369 if (unredir_tc == NULL) {
2370 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2371 ( N_UNREDIR_TT * UNREDIR_SZB );
2372 if (sr_isError(sres)) {
2373 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2374 N_UNREDIR_TT * UNREDIR_SZB);
2375 /*NOTREACHED*/
2377 unredir_tc = (ULong *)(Addr)sr_Res(sres);
2379 unredir_tc_used = 0;
2380 for (i = 0; i < N_UNREDIR_TT; i++)
2381 unredir_tt[i].inUse = False;
2382 unredir_tt_highwater = -1;
2385 /* Do a sanity check; return False on failure. */
2386 static Bool sanity_check_redir_tt_tc ( void )
2388 Int i;
2389 if (unredir_tt_highwater < -1) return False;
2390 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2392 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2393 if (unredir_tt[i].inUse)
2394 return False;
2396 if (unredir_tc_used < 0) return False;
2397 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2399 return True;
2403 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2404 is temporarily in code[0 .. code_len-1].
2406 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2407 Addr entry,
2408 Addr code,
2409 UInt code_len )
2411 Int i, j, code_szQ;
2412 HChar *srcP, *dstP;
2414 vg_assert(sanity_check_redir_tt_tc());
2416 /* This is the whole point: it's not redirected! */
2417 vg_assert(entry == vge->base[0]);
2419 /* How many unredir_tt slots are needed */
2420 code_szQ = (code_len + 7) / 8;
2422 /* Look for an empty unredir_tc slot */
2423 for (i = 0; i < N_UNREDIR_TT; i++)
2424 if (!unredir_tt[i].inUse)
2425 break;
2427 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2428 /* It's full; dump everything we currently have */
2429 init_unredir_tt_tc();
2430 i = 0;
2433 vg_assert(unredir_tc_used >= 0);
2434 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2435 vg_assert(code_szQ > 0);
2436 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2437 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2438 vg_assert(unredir_tt[i].inUse == False);
2440 if (i > unredir_tt_highwater)
2441 unredir_tt_highwater = i;
2443 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2444 srcP = (HChar*)code;
2445 for (j = 0; j < code_len; j++)
2446 dstP[j] = srcP[j];
2448 VG_(invalidate_icache)( dstP, code_len );
2450 unredir_tt[i].inUse = True;
2451 unredir_tt[i].vge = *vge;
2452 unredir_tt[i].hcode = (Addr)dstP;
2454 unredir_tc_used += code_szQ;
2455 vg_assert(unredir_tc_used >= 0);
2456 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2458 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2461 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
2462 Addr guest_addr )
2464 Int i;
2465 for (i = 0; i < N_UNREDIR_TT; i++) {
2466 if (!unredir_tt[i].inUse)
2467 continue;
2468 if (unredir_tt[i].vge.base[0] == guest_addr) {
2469 *result = unredir_tt[i].hcode;
2470 return True;
2473 return False;
2476 static void unredir_discard_translations( Addr guest_start, ULong range )
2478 Int i;
2480 vg_assert(sanity_check_redir_tt_tc());
2482 for (i = 0; i <= unredir_tt_highwater; i++) {
2483 if (unredir_tt[i].inUse) {
2484 /* Fake up a TTEntryH just so we can pass it to overlaps()
2485 without having to make a new version of overlaps() just for
2486 this rare case. */
2487 TTEntryH tmp_tteH;
2488 TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge );
2489 tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */
2490 if (overlaps( guest_start, range, &tmp_tteH )) {
2491 unredir_tt[i].inUse = False;
2498 /*------------------------------------------------------------*/
2499 /*--- Initialisation. ---*/
2500 /*------------------------------------------------------------*/
2502 void VG_(init_tt_tc) ( void )
2504 Int i, avg_codeszQ;
2506 vg_assert(!init_done);
2507 init_done = True;
2509 /* Otherwise lots of things go wrong... */
2510 vg_assert(sizeof(ULong) == 8);
2511 vg_assert(sizeof(TTEno) == sizeof(HTTno));
2512 vg_assert(sizeof(TTEno) == 2);
2513 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2514 vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2515 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2516 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2518 /* check fast cache entries really are 8 words long */
2519 vg_assert(sizeof(Addr) == sizeof(void*));
2520 vg_assert(sizeof(FastCacheSet) == 8 * sizeof(Addr));
2521 /* check fast cache entries are packed back-to-back with no spaces */
2522 vg_assert(sizeof( VG_(tt_fast) )
2523 == VG_TT_FAST_SETS * sizeof(FastCacheSet));
2524 /* check fast cache entries have the layout that the handwritten assembly
2525 fragments assume. */
2526 vg_assert(sizeof(FastCacheSet) == (1 << VG_FAST_CACHE_SET_BITS));
2527 vg_assert(offsetof(FastCacheSet,guest0) == FCS_g0);
2528 vg_assert(offsetof(FastCacheSet,host0) == FCS_h0);
2529 vg_assert(offsetof(FastCacheSet,guest1) == FCS_g1);
2530 vg_assert(offsetof(FastCacheSet,host1) == FCS_h1);
2531 vg_assert(offsetof(FastCacheSet,guest2) == FCS_g2);
2532 vg_assert(offsetof(FastCacheSet,host2) == FCS_h2);
2533 vg_assert(offsetof(FastCacheSet,guest3) == FCS_g3);
2534 vg_assert(offsetof(FastCacheSet,host3) == FCS_h3);
2535 vg_assert(offsetof(FastCacheSet,guest0) == 0 * sizeof(Addr));
2536 vg_assert(offsetof(FastCacheSet,host0) == 1 * sizeof(Addr));
2537 vg_assert(offsetof(FastCacheSet,guest1) == 2 * sizeof(Addr));
2538 vg_assert(offsetof(FastCacheSet,host1) == 3 * sizeof(Addr));
2539 vg_assert(offsetof(FastCacheSet,guest2) == 4 * sizeof(Addr));
2540 vg_assert(offsetof(FastCacheSet,host2) == 5 * sizeof(Addr));
2541 vg_assert(offsetof(FastCacheSet,guest3) == 6 * sizeof(Addr));
2542 vg_assert(offsetof(FastCacheSet,host3) == 7 * sizeof(Addr));
2544 /* check fast cache is aligned as we requested. Not fatal if it
2545 isn't, but we might as well make sure. */
2546 vg_assert(VG_IS_64_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2548 /* The TTEntryH size is critical for keeping the LLC miss rate down
2549 when doing a lot of discarding. Hence check it here. We also
2550 have a lot of TTEntryCs so let's check that too. */
2551 if (sizeof(HWord) == 8) {
2552 vg_assert(sizeof(TTEntryH) <= 32);
2553 vg_assert(sizeof(TTEntryC) <= 112);
2555 else if (sizeof(HWord) == 4) {
2556 vg_assert(sizeof(TTEntryH) <= 20);
2557 # if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \
2558 || (defined(VGP_mips64_linux) && defined(VGABI_N32)) \
2559 || defined(VGP_nanomips_linux) || defined(VGP_arm_linux)
2560 /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the
2561 structure is larger than on other 32 bit targets. */
2562 vg_assert(sizeof(TTEntryC) <= 96);
2563 # else
2564 vg_assert(sizeof(TTEntryC) <= 88);
2565 # endif
2567 else {
2568 vg_assert(0);
2571 /* All the hassle about converting between TTEntryH and
2572 VexGuestExtents was so as to ensure the following. */
2573 vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents));
2575 if (VG_(clo_verbosity) > 2)
2576 VG_(message)(Vg_DebugMsg,
2577 "TT/TC: VG_(init_tt_tc) "
2578 "(startup of code management)\n");
2580 /* Figure out how big each tc area should be. */
2581 if (VG_(clo_avg_transtab_entry_size) == 0)
2582 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2583 else
2584 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2585 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2587 /* Ensure the calculated value is not way crazy. */
2588 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2589 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2591 n_sectors = VG_(clo_num_transtab_sectors);
2592 vg_assert(n_sectors >= MIN_N_SECTORS);
2593 vg_assert(n_sectors <= MAX_N_SECTORS);
2595 /* Initialise the sectors, even the ones we aren't going to use.
2596 Set all fields to zero. */
2597 youngest_sector = 0;
2598 for (i = 0; i < MAX_N_SECTORS; i++)
2599 VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2601 /* Initialise the sector_search_order hint table, including the
2602 entries we aren't going to use. */
2603 for (i = 0; i < MAX_N_SECTORS; i++)
2604 sector_search_order[i] = INV_SNO;
2606 /* Initialise the fast cache. */
2607 invalidateFastCache();
2609 /* and the unredir tt/tc */
2610 init_unredir_tt_tc();
2612 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2613 || VG_(debugLog_getLevel) () >= 2) {
2614 VG_(message)(Vg_DebugMsg,
2615 "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2616 "%stool provided default %u\n",
2617 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2618 VG_(clo_avg_transtab_entry_size),
2619 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2620 VG_(details).avg_translation_sizeB);
2621 VG_(message)(Vg_DebugMsg,
2622 "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n",
2623 n_sectors, 8 * tc_sector_szQ,
2624 n_sectors * 8 * tc_sector_szQ );
2625 VG_(message)(Vg_DebugMsg,
2626 "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each "
2627 "= %'d total TT\n",
2628 n_sectors, N_TTES_PER_SECTOR,
2629 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)),
2630 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)),
2631 (int)(n_sectors * N_TTES_PER_SECTOR
2632 * (sizeof(TTEntryC) + sizeof(TTEntryH))));
2633 VG_(message)(Vg_DebugMsg,
2634 "TT/TC: table: %d tt entries each = %'d total tt entries\n",
2635 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2636 VG_(message)(Vg_DebugMsg,
2637 "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT"
2638 " (htt[%d] %d%% max occup)\n",
2639 n_sectors, N_HTTES_PER_SECTOR,
2640 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2641 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2642 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2645 if (0) {
2646 VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n",
2647 (Int)sizeof(VexGuestExtents));
2648 VG_(printf)("XXXX sizeof(InEdge) = %d\n", (Int)sizeof(InEdge));
2649 VG_(printf)("XXXX sizeof(OutEdge) = %d\n", (Int)sizeof(OutEdge));
2650 VG_(printf)("XXXX sizeof(InEdgeArr) = %d\n", (Int)sizeof(InEdgeArr));
2651 VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr));
2652 VG_(printf)("XXXX sizeof(TTEntryC) = %d\n", (Int)sizeof(TTEntryC));
2653 VG_(printf)("XXXX sizeof(TTEntryH) = %d\n", (Int)sizeof(TTEntryH));
2658 /*------------------------------------------------------------*/
2659 /*--- Printing out statistics. ---*/
2660 /*------------------------------------------------------------*/
2662 static Double safe_idiv( ULong a, ULong b )
2664 return (b == 0 ? 0 : (Double)a / (Double)b);
2667 UInt VG_(get_bbs_translated) ( void )
2669 return n_in_count;
2672 UInt VG_(get_bbs_discarded_or_dumped) ( void )
2674 return n_disc_count + n_dump_count;
2677 void VG_(print_tt_tc_stats) ( void )
2679 VG_(message)(Vg_DebugMsg,
2680 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2681 n_full_lookups, n_lookup_probes );
2682 VG_(message)(Vg_DebugMsg,
2683 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2684 n_fast_updates, n_fast_flushes );
2686 VG_(message)(Vg_DebugMsg,
2687 " transtab: new %'llu "
2688 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2689 "avg tce size %llu\n",
2690 n_in_count, n_in_osize, n_in_tsize,
2691 safe_idiv(n_in_tsize, n_in_osize),
2692 n_in_sc_count,
2693 n_in_tsize / (n_in_count ? n_in_count : 1));
2694 VG_(message)(Vg_DebugMsg,
2695 " transtab: dumped %'llu (%'llu -> ?" "?) "
2696 "(sectors recycled %'llu)\n",
2697 n_dump_count, n_dump_osize, n_sectors_recycled );
2698 VG_(message)(Vg_DebugMsg,
2699 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2700 n_disc_count, n_disc_osize );
2702 if (DEBUG_TRANSTAB) {
2703 VG_(printf)("\n");
2704 for (EClassNo e = 0; e < ECLASS_N; e++) {
2705 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2706 if (e % 16 == 15)
2707 VG_(printf)("\n");
2709 VG_(printf)("\n\n");
2713 /*------------------------------------------------------------*/
2714 /*--- Printing out of profiling results. ---*/
2715 /*------------------------------------------------------------*/
2717 static ULong score ( const TTEntryC* tteC )
2719 return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count);
2722 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2724 SECno sno;
2725 Int r, s;
2726 ULong score_total;
2727 TTEno i;
2729 /* First, compute the total weighted count, and find the top N
2730 ttes. tops contains pointers to the most-used n_tops blocks, in
2731 descending order (viz, tops[0] is the highest scorer). */
2732 for (s = 0; s < n_tops; s++) {
2733 tops[s].addr = 0;
2734 tops[s].score = 0;
2737 score_total = 0;
2739 for (sno = 0; sno < n_sectors; sno++) {
2740 if (sectors[sno].tc == NULL)
2741 continue;
2742 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2743 if (sectors[sno].ttH[i].status != InUse)
2744 continue;
2745 score_total += score(&sectors[sno].ttC[i]);
2746 /* Find the rank for sectors[sno].tt{C,H}[i]. */
2747 r = n_tops-1;
2748 while (True) {
2749 if (r == -1)
2750 break;
2751 if (tops[r].addr == 0) {
2752 r--;
2753 continue;
2755 if ( score(&sectors[sno].ttC[i]) > tops[r].score ) {
2756 r--;
2757 continue;
2759 break;
2761 r++;
2762 vg_assert(r >= 0 && r <= n_tops);
2763 /* This bb should be placed at r, and bbs above it shifted
2764 upwards one slot. */
2765 if (r < n_tops) {
2766 for (s = n_tops-1; s > r; s--)
2767 tops[s] = tops[s-1];
2768 tops[r].addr = sectors[sno].ttC[i].entry;
2769 tops[r].score = score( &sectors[sno].ttC[i] );
2774 /* Now zero out all the counter fields, so that we can make
2775 multiple calls here and just get the values since the last call,
2776 each time, rather than values accumulated for the whole run. */
2777 for (sno = 0; sno < n_sectors; sno++) {
2778 if (sectors[sno].tc == NULL)
2779 continue;
2780 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2781 if (sectors[sno].ttH[i].status != InUse)
2782 continue;
2783 sectors[sno].ttC[i].usage.prof.count = 0;
2787 return score_total;
2790 /*--------------------------------------------------------------------*/
2791 /*--- end ---*/
2792 /*--------------------------------------------------------------------*/