Bug499183 - FreeBSD: differences in avx-vmovq output
[valgrind.git] / VEX / pub / libvex.h
blob16810773435e2ada2a57a757c3cfcde1b5ac7bd9
2 /*---------------------------------------------------------------*/
3 /*--- begin libvex.h ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #ifndef __LIBVEX_H
35 #define __LIBVEX_H
38 #include "libvex_basictypes.h"
39 #include "libvex_ir.h"
42 /*---------------------------------------------------------------*/
43 /*--- This file defines the top-level interface to LibVEX. ---*/
44 /*---------------------------------------------------------------*/
46 /*-------------------------------------------------------*/
47 /*--- Architectures, variants, and other arch info ---*/
48 /*-------------------------------------------------------*/
50 typedef
51 enum {
52 VexArch_INVALID=0x400,
53 VexArchX86,
54 VexArchAMD64,
55 VexArchARM,
56 VexArchARM64,
57 VexArchPPC32,
58 VexArchPPC64,
59 VexArchS390X,
60 VexArchMIPS32,
61 VexArchMIPS64,
62 VexArchNANOMIPS,
64 VexArch;
67 /* Information about endianness. */
68 typedef
69 enum {
70 VexEndness_INVALID=0x600, /* unknown endianness */
71 VexEndnessLE, /* little endian */
72 VexEndnessBE /* big endian */
74 VexEndness;
77 /* For a given architecture, these specify extra capabilities beyond
78 the minimum supported (baseline) capabilities. They may be OR'd
79 together, although some combinations don't make sense. (eg, SSE2
80 but not SSE1). LibVEX_Translate will check for nonsensical
81 combinations. */
83 /* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
84 cmpxchg8b. MMXEXT is a special AMD only subset of SSE1 (Integer SSE). */
85 #define VEX_HWCAPS_X86_MMXEXT (1<<1) /* A subset of SSE1 on early AMD */
86 #define VEX_HWCAPS_X86_SSE1 (1<<2) /* SSE1 support (Pentium III) */
87 #define VEX_HWCAPS_X86_SSE2 (1<<3) /* SSE2 support (Pentium 4) */
88 #define VEX_HWCAPS_X86_SSE3 (1<<4) /* SSE3 support (>= Prescott) */
89 #define VEX_HWCAPS_X86_LZCNT (1<<5) /* SSE4a LZCNT insn */
91 /* amd64: baseline capability is SSE2, with cmpxchg8b but not
92 cmpxchg16b. */
93 #define VEX_HWCAPS_AMD64_SSE3 (1<<5) /* SSE3 support */
94 #define VEX_HWCAPS_AMD64_SSSE3 (1<<12) /* Supplemental SSE3 support */
95 #define VEX_HWCAPS_AMD64_CX16 (1<<6) /* cmpxchg16b support */
96 #define VEX_HWCAPS_AMD64_LZCNT (1<<7) /* SSE4a LZCNT insn */
97 #define VEX_HWCAPS_AMD64_AVX (1<<8) /* AVX instructions */
98 #define VEX_HWCAPS_AMD64_RDTSCP (1<<9) /* RDTSCP instruction */
99 #define VEX_HWCAPS_AMD64_BMI (1<<10) /* BMI1 instructions */
100 #define VEX_HWCAPS_AMD64_AVX2 (1<<11) /* AVX2 instructions */
101 #define VEX_HWCAPS_AMD64_RDRAND (1<<13) /* RDRAND instructions */
102 #define VEX_HWCAPS_AMD64_F16C (1<<14) /* F16C instructions */
103 #define VEX_HWCAPS_AMD64_RDSEED (1<<15) /* RDSEED instructions */
104 #define VEX_HWCAPS_AMD64_FMA3 (1<<16) /* FMA3 instructions */
105 #define VEX_HWCAPS_AMD64_FMA4 (1<<17) /* FMA4 instructions */
107 /* ppc32: baseline capability is integer only */
108 #define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */
109 #define VEX_HWCAPS_PPC32_V (1<<9) /* Altivec (VMX) */
110 #define VEX_HWCAPS_PPC32_FX (1<<10) /* FP extns (fsqrt, fsqrts) */
111 #define VEX_HWCAPS_PPC32_GX (1<<11) /* Graphics extns
112 (fres,frsqrte,fsel,stfiwx) */
113 #define VEX_HWCAPS_PPC32_VX (1<<12) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher */
114 #define VEX_HWCAPS_PPC32_DFP (1<<17) /* Decimal Floating Point (DFP) -- e.g., dadd */
115 #define VEX_HWCAPS_PPC32_ISA2_07 (1<<19) /* ISA 2.07 -- e.g., mtvsrd */
116 #define VEX_HWCAPS_PPC32_ISA3_0 (1<<21) /* ISA 3.0 -- e.g., cnttzw */
117 #define VEX_HWCAPS_PPC32_ISA3_1 (1<<22) /* ISA 3.1 -- e.g., brh */
118 /* ISA 3.1 not supported in 32-bit mode */
120 /* ppc64: baseline capability is integer and basic FP insns */
121 #define VEX_HWCAPS_PPC64_V (1<<13) /* Altivec (VMX) */
122 #define VEX_HWCAPS_PPC64_FX (1<<14) /* FP extns (fsqrt, fsqrts) */
123 #define VEX_HWCAPS_PPC64_GX (1<<15) /* Graphics extns
124 (fres,frsqrte,fsel,stfiwx) */
125 #define VEX_HWCAPS_PPC64_VX (1<<16) /* Vector-scalar floating-point (VSX); implies ISA 2.06 or higher */
126 #define VEX_HWCAPS_PPC64_DFP (1<<18) /* Decimal Floating Point (DFP) -- e.g., dadd */
127 #define VEX_HWCAPS_PPC64_ISA2_07 (1<<20) /* ISA 2.07 -- e.g., mtvsrd */
128 #define VEX_HWCAPS_PPC64_ISA3_0 (1<<22) /* ISA 3.0 -- e.g., cnttzw */
129 #define VEX_HWCAPS_PPC64_ISA3_1 (1<<23) /* ISA 3.1 -- e.g., brh */
130 #define VEX_HWCAPS_PPC64_SCV (1<<24) /* ISA 3.0, Kernel supports scv
131 instruction. */
133 /* s390x: Hardware capability encoding
135 Bits [26:31] encode the machine model (see VEX_S390X_MODEL... below)
136 Bits [0:20] encode specific hardware capabilities
137 (see VEX_HWAPS_S390X_... below)
140 /* Model numbers must be assigned in chronological order.
141 They are used as array index. */
142 #define VEX_S390X_MODEL_Z900 0
143 #define VEX_S390X_MODEL_Z800 1
144 #define VEX_S390X_MODEL_Z990 2
145 #define VEX_S390X_MODEL_Z890 3
146 #define VEX_S390X_MODEL_Z9_EC 4
147 #define VEX_S390X_MODEL_Z9_BC 5
148 #define VEX_S390X_MODEL_Z10_EC 6
149 #define VEX_S390X_MODEL_Z10_BC 7
150 #define VEX_S390X_MODEL_Z196 8
151 #define VEX_S390X_MODEL_Z114 9
152 #define VEX_S390X_MODEL_ZEC12 10
153 #define VEX_S390X_MODEL_ZBC12 11
154 #define VEX_S390X_MODEL_Z13 12
155 #define VEX_S390X_MODEL_Z13S 13
156 #define VEX_S390X_MODEL_Z14 14
157 #define VEX_S390X_MODEL_Z14_ZR1 15
158 #define VEX_S390X_MODEL_Z15 16
159 #define VEX_S390X_MODEL_Z16 17
160 #define VEX_S390X_MODEL_UNKNOWN 18 /* always last in list */
161 #define VEX_S390X_MODEL_MASK 0x3F
163 #define VEX_HWCAPS_S390X_LDISP (1<<6) /* Long-displacement facility */
164 #define VEX_HWCAPS_S390X_EIMM (1<<7) /* Extended-immediate facility */
165 #define VEX_HWCAPS_S390X_GIE (1<<8) /* General-instruction-extension facility */
166 #define VEX_HWCAPS_S390X_DFP (1<<9) /* Decimal floating point facility */
167 #define VEX_HWCAPS_S390X_FGX (1<<10) /* FPR-GR transfer facility */
168 #define VEX_HWCAPS_S390X_ETF2 (1<<11) /* ETF2-enhancement facility */
169 #define VEX_HWCAPS_S390X_STFLE (1<<12) /* STFLE facility */
170 #define VEX_HWCAPS_S390X_ETF3 (1<<13) /* ETF3-enhancement facility */
171 #define VEX_HWCAPS_S390X_STCKF (1<<14) /* STCKF facility */
172 #define VEX_HWCAPS_S390X_FPEXT (1<<15) /* Floating point extension facility */
173 #define VEX_HWCAPS_S390X_LSC (1<<16) /* Conditional load/store facility */
174 #define VEX_HWCAPS_S390X_PFPO (1<<17) /* Perform floating point ops facility */
175 #define VEX_HWCAPS_S390X_VX (1<<18) /* Vector facility */
176 #define VEX_HWCAPS_S390X_MSA5 (1<<19) /* message security assistance facility */
177 #define VEX_HWCAPS_S390X_MI2 (1<<20) /* miscellaneous-instruction-extensions facility 2 */
178 #define VEX_HWCAPS_S390X_LSC2 (1<<21) /* Conditional load/store facility2 */
179 #define VEX_HWCAPS_S390X_VXE (1<<22) /* Vector-enhancements facility */
180 #define VEX_HWCAPS_S390X_NNPA (1<<23) /* NNPA facility */
181 #define VEX_HWCAPS_S390X_DFLT (1<<24) /* Deflate-conversion facility */
183 /* Special value representing all available s390x hwcaps */
184 #define VEX_HWCAPS_S390X_ALL (VEX_HWCAPS_S390X_LDISP | \
185 VEX_HWCAPS_S390X_EIMM | \
186 VEX_HWCAPS_S390X_GIE | \
187 VEX_HWCAPS_S390X_DFP | \
188 VEX_HWCAPS_S390X_FGX | \
189 VEX_HWCAPS_S390X_STFLE | \
190 VEX_HWCAPS_S390X_STCKF | \
191 VEX_HWCAPS_S390X_FPEXT | \
192 VEX_HWCAPS_S390X_LSC | \
193 VEX_HWCAPS_S390X_ETF3 | \
194 VEX_HWCAPS_S390X_ETF2 | \
195 VEX_HWCAPS_S390X_PFPO | \
196 VEX_HWCAPS_S390X_VX | \
197 VEX_HWCAPS_S390X_MSA5 | \
198 VEX_HWCAPS_S390X_MI2 | \
199 VEX_HWCAPS_S390X_LSC2 | \
200 VEX_HWCAPS_S390X_VXE | \
201 VEX_HWCAPS_S390X_NNPA | \
202 VEX_HWCAPS_S390X_DFLT)
204 #define VEX_HWCAPS_S390X(x) ((x) & ~VEX_S390X_MODEL_MASK)
205 #define VEX_S390X_MODEL(x) ((x) & VEX_S390X_MODEL_MASK)
207 /* arm: baseline capability is ARMv4 */
208 /* Bits 5:0 - architecture level (e.g. 5 for v5, 6 for v6 etc) */
209 #define VEX_HWCAPS_ARM_VFP (1<<6) /* VFP extension */
210 #define VEX_HWCAPS_ARM_VFP2 (1<<7) /* VFPv2 */
211 #define VEX_HWCAPS_ARM_VFP3 (1<<8) /* VFPv3 */
212 /* Bits 15:10 reserved for (possible) future VFP revisions */
213 #define VEX_HWCAPS_ARM_NEON (1<<16) /* Advanced SIMD also known as NEON */
215 /* Get an ARM architecure level from HWCAPS */
216 #define VEX_ARM_ARCHLEVEL(x) ((x) & 0x3f)
218 /* ARM64: baseline capability is AArch64 v8. */
219 #define VEX_HWCAPS_ARM64_FHM (1 << 4)
220 #define VEX_HWCAPS_ARM64_DPBCVAP (1 << 5)
221 #define VEX_HWCAPS_ARM64_DPBCVADP (1 << 6)
222 #define VEX_HWCAPS_ARM64_SM3 (1 << 7)
223 #define VEX_HWCAPS_ARM64_SM4 (1 << 8)
224 #define VEX_HWCAPS_ARM64_SHA3 (1 << 9)
225 #define VEX_HWCAPS_ARM64_RDM (1 << 10)
226 #define VEX_HWCAPS_ARM64_ATOMICS (1 << 11)
227 #define VEX_HWCAPS_ARM64_I8MM (1 << 12)
228 #define VEX_HWCAPS_ARM64_BF16 (1 << 13)
229 #define VEX_HWCAPS_ARM64_FP16 (1 << 14)
230 #define VEX_HWCAPS_ARM64_VFP16 (1 << 15)
232 /* MIPS baseline capability */
233 /* Assigned Company values for bits 23:16 of the PRId Register
234 (CP0 register 15, select 0). As of the MIPS32 and MIPS64 specs from
235 MTI, the PRId register is defined in this (backwards compatible)
236 way:
238 +----------------+----------------+----------------+----------------+
239 | Company Options| Company ID | Processor ID | Revision |
240 +----------------+----------------+----------------+----------------+
241 31 24 23 16 15 8 7
245 #define VEX_PRID_COMP_LEGACY 0x00000000
246 #define VEX_PRID_COMP_MIPS 0x00010000
247 #define VEX_PRID_COMP_BROADCOM 0x00020000
248 #define VEX_PRID_COMP_NETLOGIC 0x000C0000
249 #define VEX_PRID_COMP_CAVIUM 0x000D0000
250 #define VEX_PRID_COMP_INGENIC_E1 0x00E10000 /* JZ4780 */
253 * These are valid when 23:16 == PRID_COMP_LEGACY
255 #define VEX_PRID_IMP_LOONGSON_64 0x6300 /* Loongson-2/3 */
258 * These are the PRID's for when 23:16 == PRID_COMP_MIPS
260 #define VEX_PRID_IMP_34K 0x9500
261 #define VEX_PRID_IMP_74K 0x9700
262 #define VEX_PRID_IMP_P5600 0xa800
265 * Instead of Company Options values, bits 31:24 will be packed with
266 * additional information, such as isa level and FP mode.
268 #define VEX_MIPS_CPU_ISA_M32R1 0x01000000
269 #define VEX_MIPS_CPU_ISA_M32R2 0x02000000
270 #define VEX_MIPS_CPU_ISA_M64R1 0x04000000
271 #define VEX_MIPS_CPU_ISA_M64R2 0x08000000
272 #define VEX_MIPS_CPU_ISA_M32R6 0x10000000
273 #define VEX_MIPS_CPU_ISA_M64R6 0x20000000
274 /* FP mode is FR = 1 (32 dbl. prec. FP registers) */
275 #define VEX_MIPS_HOST_FR 0x40000000
276 /* Get MIPS Extended Information */
277 #define VEX_MIPS_EX_INFO(x) ((x) & 0xFF000000)
278 /* Get MIPS Company ID from HWCAPS */
279 #define VEX_MIPS_COMP_ID(x) ((x) & 0x00FF0000)
280 /* Get MIPS Processor ID from HWCAPS */
281 #define VEX_MIPS_PROC_ID(x) ((x) & 0x0000FF00)
282 /* Get MIPS Revision from HWCAPS */
283 #define VEX_MIPS_REV(x) ((x) & 0x000000FF)
284 /* Get host FP mode */
285 #define VEX_MIPS_HOST_FP_MODE(x) (!!(VEX_MIPS_EX_INFO(x) & VEX_MIPS_HOST_FR))
286 /* Check if the processor supports MIPS32R2. */
287 #define VEX_MIPS_CPU_HAS_MIPS32R2(x) (VEX_MIPS_EX_INFO(x) & \
288 VEX_MIPS_CPU_ISA_M32R2)
289 /* Check if the processor supports MIPS64R2. */
290 #define VEX_MIPS_CPU_HAS_MIPS64R2(x) (VEX_MIPS_EX_INFO(x) & \
291 VEX_MIPS_CPU_ISA_M64R2)
292 /* Check if the processor supports MIPSR6. */
293 #define VEX_MIPS_CPU_HAS_MIPSR6(x) (VEX_MIPS_EX_INFO(x) & \
294 (VEX_MIPS_CPU_ISA_M32R6 | \
295 VEX_MIPS_CPU_ISA_M64R6))
296 /* Check if the processor supports DSP ASE Rev 2. */
297 #define VEX_MIPS_PROC_DSP2(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \
298 (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_74K))
299 /* Check if the processor supports DSP ASE Rev 1. */
300 #define VEX_MIPS_PROC_DSP(x) (VEX_MIPS_PROC_DSP2(x) || \
301 ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \
302 (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_34K)))
304 /* Check if the processor supports MIPS MSA (SIMD)*/
305 #define VEX_MIPS_PROC_MSA(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \
306 (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_P5600) && \
307 (VEX_MIPS_HOST_FP_MODE(x)))
309 /* These return statically allocated strings. */
311 extern const HChar* LibVEX_ppVexArch ( VexArch );
312 extern const HChar* LibVEX_ppVexEndness ( VexEndness endness );
313 extern const HChar* LibVEX_ppVexHwCaps ( VexArch, UInt );
316 /* The various kinds of caches */
317 typedef enum {
318 DATA_CACHE=0x500,
319 INSN_CACHE,
320 UNIFIED_CACHE
321 } VexCacheKind;
323 /* Information about a particular cache */
324 typedef struct {
325 VexCacheKind kind;
326 UInt level; /* level this cache is at, e.g. 1 for L1 cache */
327 UInt sizeB; /* size of this cache in bytes */
328 UInt line_sizeB; /* cache line size in bytes */
329 UInt assoc; /* set associativity */
330 Bool is_trace_cache; /* False, except for certain Pentium 4 models */
331 } VexCache;
333 /* Convenience macro to initialise a VexCache */
334 #define VEX_CACHE_INIT(_kind, _level, _size, _line_size, _assoc) \
335 ({ (VexCache) { .kind = _kind, .level = _level, .sizeB = _size, \
336 .line_sizeB = _line_size, .assoc = _assoc, \
337 .is_trace_cache = False }; })
339 /* Information about the cache system as a whole */
340 typedef struct {
341 UInt num_levels;
342 UInt num_caches;
343 /* Unordered array of caches for this host. NULL if there are
344 no caches. The following can always be assumed:
345 (1) There is at most one cache of a given kind per cache level.
346 (2) If there exists a unified cache at a particular level then
347 no other cache exists at that level.
348 (3) The existence of a cache at level N > 1 implies the existence of
349 at least one cache at level N-1. */
350 VexCache *caches;
351 Bool icaches_maintain_coherence;
352 } VexCacheInfo;
355 /* This struct is a bit of a hack, but is needed to carry misc
356 important bits of info about an arch. Fields which are meaningless
357 or ignored for the platform in question should be set to zero.
358 Nb: if you add fields to the struct make sure to update function
359 LibVEX_default_VexArchInfo. */
361 typedef
362 struct {
363 /* The following three fields are mandatory. */
364 UInt hwcaps;
365 VexEndness endness;
366 VexCacheInfo hwcache_info;
367 /* PPC32/PPC64 only: size of instruction cache line */
368 Int ppc_icache_line_szB;
369 /* PPC32/PPC64 only: sizes zeroed by the dcbz/dcbzl instructions
370 (bug#135264) */
371 UInt ppc_dcbz_szB;
372 /* PPC32/PPC64 only: True scv is supported */
373 Bool ppc_scv_supported;
374 UInt ppc_dcbzl_szB; /* 0 means unsupported (SIGILL) */
375 /* ARM64: I- and D- minimum line sizes in log2(bytes), as
376 obtained from ctr_el0.DminLine and .IminLine. For example, a
377 line size of 64 bytes would be encoded here as 6. */
378 UInt arm64_dMinLine_lg2_szB;
379 UInt arm64_iMinLine_lg2_szB;
380 UChar arm64_cache_block_size;
381 /* ARM64: does the host require us to use the fallback LLSC
382 implementation? */
383 Bool arm64_requires_fallback_LLSC;
385 VexArchInfo;
387 /* Write default settings info *vai. */
388 extern
389 void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai );
392 /* This struct carries guest and host ABI variant information that may
393 be needed. Fields which are meaningless or ignored for the
394 platform in question should be set to zero.
396 Settings which are believed to be correct are:
398 guest_stack_redzone_size
399 guest is ppc32-linux ==> 0
400 guest is ppc64-linux ==> 288
401 guest is amd64-linux ==> 128
402 guest is other ==> inapplicable
404 guest_amd64_assume_fs_is_const
405 guest is amd64-linux ==> True
406 guest is amd64-darwin ==> False
407 guest is amd64-solaris ==> True
408 guest is other ==> inapplicable
410 guest_amd64_assume_gs_is_const
411 guest is amd64-darwin ==> True
412 guest is amd64-linux ==> True
413 guest is amd64-solaris ==> False
414 guest is other ==> inapplicable
416 guest_ppc_zap_RZ_at_blr
417 guest is ppc64-linux ==> True
418 guest is ppc32-linux ==> False
419 guest is other ==> inapplicable
421 guest_ppc_zap_RZ_at_bl
422 guest is ppc64-linux ==> const True
423 guest is ppc32-linux ==> const False
424 guest is other ==> inapplicable
426 guest__use_fallback_LLSC
427 guest is mips32 ==> applicable, default True
428 guest is mips64 ==> applicable, default True
429 guest is arm64 ==> applicable, default False
431 host_ppc_calls_use_fndescrs:
432 host is ppc32-linux ==> False
433 host is ppc64-linux ==> True
434 host is other ==> inapplicable
437 typedef
438 struct {
439 /* PPC and AMD64 GUESTS only: how many bytes below the
440 stack pointer are validly addressible? */
441 Int guest_stack_redzone_size;
443 /* AMD64 GUESTS only: should we translate %fs-prefixed
444 instructions using the assumption that %fs always contains
445 the same value? (typically zero on linux and solaris) */
446 Bool guest_amd64_assume_fs_is_const;
448 /* AMD64 GUESTS only: should we translate %gs-prefixed
449 instructions using the assumption that %gs always contains
450 the same value? (typically 0x60 on darwin)? */
451 Bool guest_amd64_assume_gs_is_const;
453 /* AMD64 GUESTS only: for a misaligned memory access, for which we should
454 generate a trap, should we generate SigBUS (a la FreeBSD) or SIGSEGV
455 (Linux, OSX) ?? */
456 Bool guest_amd64_sigbus_on_misalign;
458 /* PPC GUESTS only: should we zap the stack red zone at a 'blr'
459 (function return) ? */
460 Bool guest_ppc_zap_RZ_at_blr;
462 /* PPC GUESTS only: should we zap the stack red zone at a 'bl'
463 (function call) ? Is supplied with the guest address of the
464 target of the call since that may be significant. If NULL,
465 is assumed equivalent to a fn which always returns False. */
466 Bool (*guest_ppc_zap_RZ_at_bl)(Addr);
468 /* Potentially for all guests that use LL/SC: use the fallback
469 (synthesised) implementation rather than passing LL/SC on to
470 the host? */
471 Bool guest__use_fallback_LLSC;
473 /* PPC32/PPC64 HOSTS only: does '&f' give us a pointer to a
474 function descriptor on the host, or to the function code
475 itself? True => descriptor, False => code. */
476 Bool host_ppc_calls_use_fndescrs;
478 /* MIPS32/MIPS64 GUESTS only: emulated FPU mode. */
479 UInt guest_mips_fp_mode;
481 VexAbiInfo;
483 /* Write default settings info *vbi. */
484 extern
485 void LibVEX_default_VexAbiInfo ( /*OUT*/VexAbiInfo* vbi );
488 /*-------------------------------------------------------*/
489 /*--- Control of Vex's optimiser (iropt). ---*/
490 /*-------------------------------------------------------*/
493 /* VexRegisterUpdates specifies when to ensure that the guest state is
494 up to date, in order of increasing accuracy but increasing expense.
496 VexRegUpdSpAtMemAccess: all registers are updated at superblock
497 exits, and SP is also up to date at memory exception points. The
498 SP is described by the arch specific functions
499 guest_<arch>_state_requires_precise_mem_exns.
501 VexRegUpdUnwindregsAtMemAccess: registers needed to make a stack
502 trace are up to date at memory exception points. Typically,
503 these are PC/SP/FP. The minimal registers are described by the
504 arch specific functions guest_<arch>_state_requires_precise_mem_exns.
505 This is what Valgrind sets as the default.
507 VexRegUpdAllregsAtMemAccess: all registers up to date at memory
508 exception points. This is what normally might be considered as
509 providing "precise exceptions for memory", but does not
510 necessarily provide precise register values at any other kind of
511 exception.
513 VexRegUpdAllregsAtEachInsn: all registers up to date at each
514 instruction.
516 typedef
517 enum {
518 VexRegUpd_INVALID=0x700,
519 VexRegUpdSpAtMemAccess,
520 VexRegUpdUnwindregsAtMemAccess,
521 VexRegUpdAllregsAtMemAccess,
522 VexRegUpdAllregsAtEachInsn
524 VexRegisterUpdates;
526 /* Control of Vex's optimiser. */
528 typedef
529 struct {
530 /* Controls verbosity of iropt. 0 = no output. */
531 Int iropt_verbosity;
532 /* Control aggressiveness of iropt. 0 = no opt, 1 = simple
533 opts, 2 (default) = max optimisation. */
534 Int iropt_level;
535 /* Controls when registers are updated in guest state. Note
536 that this is the default value. The VEX client can override
537 this on a per-IRSB basis if it wants. bb_to_IR() will query
538 the client to ask if it wants a different setting for the
539 block under construction, and that new setting is transported
540 back to LibVEX_Translate, which feeds it to iropt via the
541 various do_iropt_BB calls. */
542 VexRegisterUpdates iropt_register_updates_default;
543 /* How aggressive should iropt be in unrolling loops? Higher
544 numbers make it more enthusiastic about loop unrolling.
545 Default=120. A setting of zero disables unrolling. */
546 Int iropt_unroll_thresh;
547 /* What's the maximum basic block length the front end(s) allow?
548 BBs longer than this are split up. Default=60 (guest
549 insns). */
550 Int guest_max_insns;
551 /* Should Vex try to construct superblocks, by chasing unconditional
552 branches/calls to known destinations, and performing AND/OR idiom
553 recognition? It is recommended to set this to True as that possibly
554 improves performance a bit, and also is important for avoiding certain
555 kinds of false positives in Memcheck. Default=True. */
556 Bool guest_chase;
557 /* Register allocator version. Allowed values are:
558 - '2': previous, good and slow implementation.
559 - '3': current, faster implementation; perhaps producing slightly worse
560 spilling decisions. */
561 UInt regalloc_version;
563 VexControl;
566 /* Write the default settings into *vcon. */
568 extern
569 void LibVEX_default_VexControl ( /*OUT*/ VexControl* vcon );
572 /*-------------------------------------------------------*/
573 /*--- Storage management control ---*/
574 /*-------------------------------------------------------*/
576 /* Allocate in Vex's temporary allocation area. Be careful with this.
577 You can only call it inside an instrumentation or optimisation
578 callback that you have previously specified in a call to
579 LibVEX_Translate. The storage allocated will only stay alive until
580 translation of the current basic block is complete. */
581 extern void* LibVEX_Alloc ( SizeT nbytes );
583 /* Show Vex allocation statistics. */
584 extern void LibVEX_ShowAllocStats ( void );
587 /*-------------------------------------------------------*/
588 /*--- Describing guest state layout ---*/
589 /*-------------------------------------------------------*/
591 /* Describe the guest state enough that the instrumentation
592 functions can work. */
594 /* The max number of guest state chunks which we can describe as
595 always defined (for the benefit of Memcheck). */
596 #define VEXGLO_N_ALWAYSDEFD 24
598 typedef
599 struct {
600 /* Total size of the guest state, in bytes. Must be
601 16-aligned. */
602 Int total_sizeB;
603 /* Whereabouts is the stack pointer? */
604 Int offset_SP;
605 Int sizeof_SP; /* 4 or 8 */
606 /* Whereabouts is the frame pointer? */
607 Int offset_FP;
608 Int sizeof_FP; /* 4 or 8 */
609 /* Whereabouts is the instruction pointer? */
610 Int offset_IP;
611 Int sizeof_IP; /* 4 or 8 */
612 /* Describe parts of the guest state regarded as 'always
613 defined'. */
614 Int n_alwaysDefd;
615 struct {
616 Int offset;
617 Int size;
618 } alwaysDefd[VEXGLO_N_ALWAYSDEFD];
620 VexGuestLayout;
622 /* A note about guest state layout.
624 LibVEX defines the layout for the guest state, in the file
625 pub/libvex_guest_<arch>.h. The struct will have an 16-aligned
626 size. Each translated bb is assumed to be entered with a specified
627 register pointing at such a struct. Beyond that is two copies of
628 the shadow state area with the same size as the struct. Beyond
629 that is a spill area that LibVEX may spill into. It must have size
630 LibVEX_N_SPILL_BYTES, and this must be a 16-aligned number.
632 On entry, the baseblock pointer register must be 16-aligned.
634 There must be no holes in between the primary guest state, its two
635 copies, and the spill area. In short, all 4 areas must have a
636 16-aligned size and be 16-aligned, and placed back-to-back.
639 #define LibVEX_N_SPILL_BYTES 4096
641 /* The size of the guest state must be a multiple of this number. */
642 #define LibVEX_GUEST_STATE_ALIGN 16
644 /*-------------------------------------------------------*/
645 /*--- Initialisation of the library ---*/
646 /*-------------------------------------------------------*/
648 /* Initialise the library. You must call this first. */
650 extern void LibVEX_Init (
652 /* failure exit function */
653 # if defined(__cplusplus) && defined(__GNUC__) && __GNUC__ <= 3
654 /* g++ 3.x doesn't understand attributes on function parameters.
655 See #265762. */
656 # else
657 __attribute__ ((noreturn))
658 # endif
659 void (*failure_exit) ( void ),
661 /* logging output function */
662 void (*log_bytes) ( const HChar*, SizeT nbytes ),
664 /* debug paranoia level */
665 Int debuglevel,
667 /* Control ... */
668 const VexControl* vcon
672 /*-------------------------------------------------------*/
673 /*--- Make a translation ---*/
674 /*-------------------------------------------------------*/
676 /* Describes the outcome of a translation attempt. */
677 typedef
678 struct {
679 /* overall status */
680 enum { VexTransOK=0x800,
681 VexTransAccessFail, VexTransOutputFull } status;
682 /* The number of extents that have a self-check (0 to 3) */
683 UInt n_sc_extents;
684 /* Offset in generated code of the profile inc, or -1 if
685 none. Needed for later patching. */
686 Int offs_profInc;
687 /* Stats only: the number of guest insns included in the
688 translation. It may be zero (!). */
689 UInt n_guest_instrs;
690 /* Stats only: the number of unconditional branches incorporated into the
691 trace. */
692 UShort n_uncond_in_trace;
693 /* Stats only: the number of conditional branches incorporated into the
694 trace. */
695 UShort n_cond_in_trace;
697 VexTranslateResult;
700 /* Describes precisely the pieces of guest code that a translation
701 covers. Now that Vex can chase across BB boundaries, the old
702 scheme of describing a chunk of guest code merely by its start
703 address and length is inadequate.
705 This struct uses 20 bytes on a 32-bit archtecture and 32 bytes on a
706 64-bit architecture. Space is important as clients will have to store
707 one of these for each translation made.
709 typedef
710 struct {
711 Addr base[3];
712 UShort len[3];
713 UShort n_used;
715 VexGuestExtents;
718 /* A structure to carry arguments for LibVEX_Translate. There are so
719 many of them, it seems better to have a structure. */
720 typedef
721 struct {
722 /* IN: The instruction sets we are translating from and to. And
723 guest/host misc info. */
724 VexArch arch_guest;
725 VexArchInfo archinfo_guest;
726 VexArch arch_host;
727 VexArchInfo archinfo_host;
728 VexAbiInfo abiinfo_both;
730 /* IN: an opaque value which is passed as the first arg to all
731 callback functions supplied in this struct. Vex has no idea
732 what's at the other end of this pointer. */
733 void* callback_opaque;
735 /* IN: the block to translate, and its guest address. */
736 /* where are the actual bytes in the host's address space? */
737 const UChar* guest_bytes;
738 /* where do the bytes really come from in the guest's aspace?
739 This is the post-redirection guest address. Not that Vex
740 understands anything about redirection; that is all done on
741 the Valgrind side. */
742 Addr guest_bytes_addr;
744 /* Is it OK to chase into this guest address? May not be
745 NULL. */
746 Bool (*chase_into_ok) ( /*callback_opaque*/void*, Addr );
748 /* OUT: which bits of guest code actually got translated */
749 VexGuestExtents* guest_extents;
751 /* IN: a place to put the resulting code, and its size */
752 UChar* host_bytes;
753 Int host_bytes_size;
754 /* OUT: how much of the output area is used. */
755 Int* host_bytes_used;
757 /* IN: optionally, two instrumentation functions. May be
758 NULL. */
759 IRSB* (*instrument1) ( /*callback_opaque*/void*,
760 IRSB*,
761 const VexGuestLayout*,
762 const VexGuestExtents*,
763 const VexArchInfo*,
764 IRType gWordTy, IRType hWordTy );
765 IRSB* (*instrument2) ( /*callback_opaque*/void*,
766 IRSB*,
767 const VexGuestLayout*,
768 const VexGuestExtents*,
769 const VexArchInfo*,
770 IRType gWordTy, IRType hWordTy );
772 IRSB* (*finaltidy) ( IRSB* );
774 /* IN: a callback used to ask the caller which of the extents,
775 if any, a self check is required for. Must not be NULL.
776 The returned value is a bitmask with a 1 in position i indicating
777 that the i'th extent needs a check. Since there can be at most
778 3 extents, the returned values must be between 0 and 7.
780 This call also gives the VEX client the opportunity to change
781 the precision of register update preservation as performed by
782 the IR optimiser. Before the call, VEX will set *pxControl
783 to hold the default register-update status value as specified
784 by VexControl::iropt_register_updates_default as passed to
785 LibVEX_Init at library initialisation time. The client (in
786 this callback) can if it wants, inspect the value and change
787 it to something different, and that value will be used for
788 subsequent IR optimisation of the block. */
789 UInt (*needs_self_check)( /*callback_opaque*/void*,
790 /*MAYBE_MOD*/VexRegisterUpdates* pxControl,
791 const VexGuestExtents* );
793 /* IN: optionally, a callback which allows the caller to add its
794 own IR preamble following the self-check and any other
795 VEX-generated preamble, if any. May be NULL. If non-NULL,
796 the IRSB under construction is handed to this function, which
797 presumably adds IR statements to it. The callback may
798 optionally complete the block and direct bb_to_IR not to
799 disassemble any instructions into it; this is indicated by
800 the callback returning True.
802 Bool (*preamble_function)(/*callback_opaque*/void*, IRSB*);
804 /* IN: debug: trace vex activity at various points */
805 Int traceflags;
807 /* IN: debug: print diagnostics when an illegal instr is detected */
808 Bool sigill_diag;
810 /* IN: profiling: add a 64 bit profiler counter increment to the
811 translation? */
812 Bool addProfInc;
814 /* IN: address of the dispatcher entry points. Describes the
815 places where generated code should jump to at the end of each
818 At the end of each translation, the next guest address is
819 placed in the host's standard return register (x86: %eax,
820 amd64: %rax, ppc32: %r3, ppc64: %r3). Optionally, the guest
821 state pointer register (on host x86: %ebp; amd64: %rbp;
822 ppc32/64: r31) may be set to a VEX_TRC_ value to indicate any
823 special action required before the next block is run.
825 Control is then passed back to the dispatcher (beyond Vex's
826 control; caller supplies this) in the following way:
828 - On host archs which lack a link register (x86, amd64), by a
829 jump to the host address specified in
830 'dispatcher_assisted', if the guest state pointer has been
831 changed so as to request some action before the next block
832 is run, or 'dispatcher_unassisted' (the fast path), in
833 which it is assumed that the guest state pointer is
834 unchanged and we wish to continue directly with the next
835 translation. Both of these must be non-NULL.
837 - On host archs which have a link register (ppc32, ppc64), by
838 a branch to the link register (which is guaranteed to be
839 unchanged from whatever it was at entry to the
840 translation). 'dispatch_assisted' and
841 'dispatch_unassisted' must be NULL.
843 The aim is to get back and forth between translations and the
844 dispatcher without creating memory traffic to store return
845 addresses.
847 FIXME: update this comment
849 const void* disp_cp_chain_me_to_slowEP;
850 const void* disp_cp_chain_me_to_fastEP;
851 const void* disp_cp_xindir;
852 const void* disp_cp_xassisted;
854 VexTranslateArgs;
857 /* Runs the entire compilation pipeline. */
858 extern
859 VexTranslateResult LibVEX_Translate ( /*MOD*/ VexTranslateArgs* );
861 /* Runs the first half of the compilation pipeline: lifts guest code to IR,
862 optimises, instruments and optimises it some more. */
863 extern
864 IRSB* LibVEX_FrontEnd ( /*MOD*/ VexTranslateArgs*,
865 /*OUT*/ VexTranslateResult* res,
866 /*OUT*/ VexRegisterUpdates* pxControl );
869 /* A subtlety re interaction between self-checking translations and
870 bb-chasing. The supplied chase_into_ok function should say NO
871 (False) when presented with any address for which you might want to
872 make a self-checking translation.
874 If it doesn't do that, you may end up with Vex chasing from BB #1
875 to BB #2 (fine); but if you wanted checking for #2 and not #1, that
876 would not be the result. Therefore chase_into_ok should disallow
877 following into #2. That will force the caller to eventually
878 request a new translation starting at #2, at which point Vex will
879 correctly observe the make-a-self-check flag.
881 FIXME: is this still up to date? */
884 /*-------------------------------------------------------*/
885 /*--- Patch existing translations ---*/
886 /*-------------------------------------------------------*/
888 /* A host address range that was modified by the functions below.
889 Callers must request I-cache syncing after the call as appropriate. */
890 typedef
891 struct {
892 HWord start;
893 HWord len; /* always > 0 */
895 VexInvalRange;
897 /* Chain an XDirect jump located at place_to_chain so it jumps to
898 place_to_jump_to. It is expected (and checked) that this site
899 currently contains a call to the dispatcher specified by
900 disp_cp_chain_me_EXPECTED. */
901 extern
902 VexInvalRange LibVEX_Chain ( VexArch arch_host,
903 VexEndness endhess_host,
904 void* place_to_chain,
905 const void* disp_cp_chain_me_EXPECTED,
906 const void* place_to_jump_to );
908 /* Undo an XDirect jump located at place_to_unchain, so it is
909 converted back into a call to disp_cp_chain_me. It is expected
910 (and checked) that this site currently contains a jump directly to
911 the address specified by place_to_jump_to_EXPECTED. */
912 extern
913 VexInvalRange LibVEX_UnChain ( VexArch arch_host,
914 VexEndness endness_host,
915 void* place_to_unchain,
916 const void* place_to_jump_to_EXPECTED,
917 const void* disp_cp_chain_me );
919 /* Returns a constant -- the size of the event check that is put at
920 the start of every translation. This makes it possible to
921 calculate the fast entry point address if the slow entry point
922 address is known (the usual case), or vice versa. */
923 extern
924 Int LibVEX_evCheckSzB ( VexArch arch_host );
927 /* Patch the counter location into an existing ProfInc point. The
928 specified point is checked to make sure it is plausible. */
929 extern
930 VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host,
931 VexEndness endness_host,
932 void* place_to_patch,
933 const ULong* location_of_counter );
936 /*-------------------------------------------------------*/
937 /*--- Show accumulated statistics ---*/
938 /*-------------------------------------------------------*/
940 extern void LibVEX_ShowStats ( void );
942 /*-------------------------------------------------------*/
943 /*-- IR injection --*/
944 /*-------------------------------------------------------*/
946 /* IR Injection Control Block */
948 #define NO_ROUNDING_MODE (~0u)
950 typedef
951 struct {
952 IROp op; // the operation to perform
953 HWord result; // address of the result
954 HWord opnd1; // address of 1st operand
955 HWord opnd2; // address of 2nd operand
956 HWord opnd3; // address of 3rd operand
957 HWord opnd4; // address of 4th operand
958 IRType t_result; // type of result
959 IRType t_opnd1; // type of 1st operand
960 IRType t_opnd2; // type of 2nd operand
961 IRType t_opnd3; // type of 3rd operand
962 IRType t_opnd4; // type of 4th operand
963 UInt rounding_mode;
964 UInt num_operands; // excluding rounding mode, if any
965 /* The following two members describe if this operand has immediate
966 * operands. There are a few restrictions:
967 * (1) An operator can have at most one immediate operand.
968 * (2) If there is an immediate operand, it is the right-most operand
969 * An immediate_index of 0 means there is no immediate operand.
971 UInt immediate_type; // size of immediate Ity_I8, Ity_16
972 UInt immediate_index; // operand number: 1, 2
974 IRICB;
976 extern void LibVEX_InitIRI ( const IRICB * );
978 /*-------------------------------------------------------*/
979 /*--- Notes ---*/
980 /*-------------------------------------------------------*/
982 /* Code generation conventions that need to be recorded somewhere.
983 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
987 Generated code should be entered using a JMP instruction. On
988 entry, %ebp should point to the guest state, and %esp should be a
989 valid stack pointer. The generated code may change %eax, %ebx,
990 %ecx, %edx, %esi, %edi, all the FP registers and control state, and
991 all the XMM registers.
993 On entry, the FPU control word should be set to 0x027F, and the SSE
994 control word (%mxcsr) should be set to 0x1F80. On exit, they
995 should still have those values (after masking off the lowest 6 bits
996 of %mxcsr). If they don't, there is a bug in VEX-generated code.
998 Generated code returns to the scheduler using a JMP instruction, to
999 the address specified in the .dispatch field of VexTranslateArgs.
1000 %eax (or %eax:%edx, if simulating a 64-bit target) will contain the
1001 guest address of the next block to execute. %ebp may be changed
1002 to a VEX_TRC_ value, otherwise it should be as it was at entry.
1004 CRITICAL ISSUES in x86 code generation. The only known critical
1005 issue is that the host FPU and SSE state is not properly saved
1006 across calls to helper functions. If any helper references any
1007 such state, it is likely (1) to misbehave itself, since the FP
1008 stack tags will not be as expected, and (2) after returning to
1009 generated code, the generated code is likely to go wrong. This
1010 really should be fixed.
1012 amd64
1013 ~~~~~
1014 Analogous to x86.
1016 ppc32
1017 ~~~~~
1018 On entry, guest state pointer is r31. .dispatch must be NULL.
1019 Control is returned with a branch to the link register. Generated
1020 code will not change lr. At return, r3 holds the next guest addr
1021 (or r3:r4 ?). r31 may be may be changed to a VEX_TRC_ value,
1022 otherwise it should be as it was at entry.
1024 ppc64
1025 ~~~~~
1026 Same as ppc32.
1028 arm32
1029 ~~~~~
1030 r8 is GSP.
1032 arm64
1033 ~~~~~
1034 r21 is GSP.
1036 ALL GUEST ARCHITECTURES
1037 ~~~~~~~~~~~~~~~~~~~~~~~
1038 The guest state must contain two pseudo-registers, guest_CMSTART
1039 and guest_CMLEN. These are used to specify guest address ranges,
1040 either of code to be invalidated, when used in conjunction with
1041 Ijk_InvalICache, or of d-cache ranges to be flushed, when used in
1042 conjunction with Ijk_FlushDCache. In such cases, the two _CM
1043 pseudo-regs should be filled in by the IR, and then an exit with
1044 one of the two abovementioned Ijk_ kinds should happen, so that the
1045 dispatcher can action them. Both pseudo-regs must have size equal
1046 to the guest word size.
1048 The architecture must a third pseudo-register, guest_NRADDR, also
1049 guest-word-sized. This is used to record the unredirected guest
1050 address at the start of a translation whose start has been
1051 redirected. By reading this pseudo-register shortly afterwards,
1052 the translation can find out what the corresponding no-redirection
1053 address was. Note, this is only set for wrap-style redirects, not
1054 for replace-style ones.
1056 #endif /* ndef __LIBVEX_H */
1058 /*---------------------------------------------------------------*/
1059 /*--- libvex.h ---*/
1060 /*---------------------------------------------------------------*/