drd/tests/tsan_unittest: Avoid that this test reads from uninitialized memory
[valgrind.git] / coregrind / pub_core_transtab_asm.h
blob9e85774c7454ee652129f985d244403a7e311b75
2 /*--------------------------------------------------------------------*/
3 /*--- Asm-only TransTab stuff. pub_core_transtab_asm.h ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2000-2017 Julian Seward
11 jseward@acm.org
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
28 The GNU General Public License is contained in the file COPYING.
31 #ifndef __PUB_CORE_TRANSTAB_ASM_H
32 #define __PUB_CORE_TRANSTAB_ASM_H
34 /* Constants for the fast translation lookup cache. It is a 4 way associative
35 cache, with more-or-less LRU replacement. It contains 2^VG_TT_FAST_BITS
36 sets.
38 On all targets, the set number is computed from least significant 2 *
39 VG_TT_FAST_BITS of the guest address. This is a bit unusual in as much as
40 it is more normal just to use a VG_TT_FAST_BITS-sized slice of the address
41 as the set number. Using twice as many bits (the two chunks are xor'd)
42 spreads entries out (reduces aliasing) and significantly reduces the overall
43 miss rate. The cost is two extra cycles on the fast lookup path, to perform
44 an extra shift and an xor.
46 For each set there are 4 ways: way0, way1, way2 and way3. way0 is intended
47 to be the MRU and way3 the LRU. Most lookups hit way0 and involve no
48 modification of the line. A hit at way1 causes way0 and way1 to be swapped.
49 A hit at way2 causes way1 and way2 to be swapped; that is, way2 is moved one
50 step closer to the front. But not all the way to the front. Similarly a
51 hit at way3 causes way2 and way3 to be swapped.
53 See VG_(lookupInFastCache) for a C implementation of this logic and
54 dispatch-*-*.S, label VG_(disp_cp_xindir), for the handcoded assembly
55 equivalents for each target. Note that VG_(lookupInFastCache) is used in C
56 land for some administrative lookups but isn't really performance critical.
57 The dispatch-*-*.S implementations are used to process all indirect branches
58 in the simulator and so *are* performance critical.
60 Updates to the cache are rare. These are performed by setFastCacheEntry.
61 New entries are put into way0 and all others are shifted down one slot, so
62 that the contents of way3 falls out of the cache.
64 On x86/amd64, the cache index is computed as
65 (address ^ (address >>u VG_TT_FAST_BITS))[VG_TT_FAST_BITS-1 : 0]'.
67 On ppc32/ppc64/mips32/mips64/arm64, the bottom two bits of instruction
68 addresses are zero, which means the above function causes only 1/4 of the
69 sets to ever be used. So instead the function is
70 (address ^ (address >>u VG_TT_FAST_BITS))[VG_TT_FAST_BITS-1+2 : 0+2]'.
72 On arm32, the minimum instruction size is 2, so we discard only the least
73 significant bit of the address, hence:
74 (address ^ (address >>u VG_TT_FAST_BITS))[VG_TT_FAST_BITS-1+1 : 0+1]'.
76 On s390x the rightmost bit of an instruction address is zero, so the arm32
77 scheme is used. */
79 #define VG_TT_FAST_BITS 13
80 #define VG_TT_FAST_SETS (1 << VG_TT_FAST_BITS)
81 #define VG_TT_FAST_MASK ((VG_TT_FAST_SETS) - 1)
83 // Log2(sizeof(FastCacheSet)). This is needed in the handwritten assembly.
85 #if defined(VGA_amd64) || defined(VGA_arm64) \
86 || defined(VGA_ppc64be) || defined(VGA_ppc64le) || defined(VGA_mips64) \
87 || defined(VGA_s390x)
88 // And all other 64-bit hosts
89 # define VG_FAST_CACHE_SET_BITS 6
90 // These FCS_{g,h}{0,1,2,3} are the values of
91 // offsetof(FastCacheSet,{guest,host}{0,1,2,3}).
92 # define FCS_g0 0
93 # define FCS_h0 8
94 # define FCS_g1 16
95 # define FCS_h1 24
96 # define FCS_g2 32
97 # define FCS_h2 40
98 # define FCS_g3 48
99 # define FCS_h3 56
101 #elif defined(VGA_x86) || defined(VGA_arm) || defined(VGA_ppc32) \
102 || defined(VGA_mips32)
103 // And all other 32-bit hosts
104 # define VG_FAST_CACHE_SET_BITS 5
105 # define FCS_g0 0
106 # define FCS_h0 4
107 # define FCS_g1 8
108 # define FCS_h1 12
109 # define FCS_g2 16
110 # define FCS_h2 20
111 # define FCS_g3 24
112 # define FCS_h3 28
114 #else
115 # error "VG_FAST_CACHE_SET_BITS not known"
116 #endif
118 #endif // __PUB_CORE_TRANSTAB_ASM_H
120 /*--------------------------------------------------------------------*/
121 /*--- end ---*/
122 /*--------------------------------------------------------------------*/