Expose rdrand and f16c through cpuid also if the host only has avx.
[valgrind.git] / coregrind / m_machine.c
blob56a28d108d70dcc10b352cfded4b7c334d350ec6
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr VG_(get_IP) ( ThreadId tid ) {
50 return INSTR_PTR( VG_(threads)[tid].arch );
52 Addr VG_(get_SP) ( ThreadId tid ) {
53 return STACK_PTR( VG_(threads)[tid].arch );
55 Addr VG_(get_FP) ( ThreadId tid ) {
56 return FRAME_PTR( VG_(threads)[tid].arch );
59 Addr VG_(get_SP_s1) ( ThreadId tid ) {
60 return STACK_PTR_S1( VG_(threads)[tid].arch );
62 void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
63 STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
66 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
67 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
69 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
70 STACK_PTR( VG_(threads)[tid].arch ) = sp;
73 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
74 ThreadId tid )
76 # if defined(VGA_x86)
77 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
78 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
79 regs->misc.X86.r_ebp
80 = VG_(threads)[tid].arch.vex.guest_EBP;
81 # elif defined(VGA_amd64)
82 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
83 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
84 regs->misc.AMD64.r_rbp
85 = VG_(threads)[tid].arch.vex.guest_RBP;
86 # elif defined(VGA_ppc32)
87 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
88 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
89 regs->misc.PPC32.r_lr
90 = VG_(threads)[tid].arch.vex.guest_LR;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
93 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
94 regs->misc.PPC64.r_lr
95 = VG_(threads)[tid].arch.vex.guest_LR;
96 # elif defined(VGA_arm)
97 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
98 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
99 regs->misc.ARM.r14
100 = VG_(threads)[tid].arch.vex.guest_R14;
101 regs->misc.ARM.r12
102 = VG_(threads)[tid].arch.vex.guest_R12;
103 regs->misc.ARM.r11
104 = VG_(threads)[tid].arch.vex.guest_R11;
105 regs->misc.ARM.r7
106 = VG_(threads)[tid].arch.vex.guest_R7;
107 # elif defined(VGA_arm64)
108 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
109 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
110 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
111 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
112 # elif defined(VGA_s390x)
113 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
114 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
115 regs->misc.S390X.r_fp
116 = VG_(threads)[tid].arch.vex.guest_FP;
117 regs->misc.S390X.r_lr
118 = VG_(threads)[tid].arch.vex.guest_LR;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs->misc.S390X.r_f0
121 = VG_(threads)[tid].arch.vex.guest_v0.w64[0];
122 regs->misc.S390X.r_f1
123 = VG_(threads)[tid].arch.vex.guest_v1.w64[0];
124 regs->misc.S390X.r_f2
125 = VG_(threads)[tid].arch.vex.guest_v2.w64[0];
126 regs->misc.S390X.r_f3
127 = VG_(threads)[tid].arch.vex.guest_v3.w64[0];
128 regs->misc.S390X.r_f4
129 = VG_(threads)[tid].arch.vex.guest_v4.w64[0];
130 regs->misc.S390X.r_f5
131 = VG_(threads)[tid].arch.vex.guest_v5.w64[0];
132 regs->misc.S390X.r_f6
133 = VG_(threads)[tid].arch.vex.guest_v6.w64[0];
134 regs->misc.S390X.r_f7
135 = VG_(threads)[tid].arch.vex.guest_v7.w64[0];
136 # elif defined(VGA_mips32)
137 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
138 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
139 regs->misc.MIPS32.r30
140 = VG_(threads)[tid].arch.vex.guest_r30;
141 regs->misc.MIPS32.r31
142 = VG_(threads)[tid].arch.vex.guest_r31;
143 regs->misc.MIPS32.r28
144 = VG_(threads)[tid].arch.vex.guest_r28;
145 # elif defined(VGA_mips64)
146 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
147 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
148 regs->misc.MIPS64.r30
149 = VG_(threads)[tid].arch.vex.guest_r30;
150 regs->misc.MIPS64.r31
151 = VG_(threads)[tid].arch.vex.guest_r31;
152 regs->misc.MIPS64.r28
153 = VG_(threads)[tid].arch.vex.guest_r28;
154 # else
155 # error "Unknown arch"
156 # endif
159 void
160 VG_(get_shadow_regs_area) ( ThreadId tid,
161 /*DST*/UChar* dst,
162 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
164 void* src;
165 ThreadState* tst;
166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167 vg_assert(VG_(is_valid_tid)(tid));
168 // Bounds check
169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170 vg_assert(offset + size <= sizeof(VexGuestArchState));
171 // Copy
172 tst = & VG_(threads)[tid];
173 src = NULL;
174 switch (shadowNo) {
175 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
179 vg_assert(src != NULL);
180 VG_(memcpy)( dst, src, size);
183 void
184 VG_(set_shadow_regs_area) ( ThreadId tid,
185 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
186 /*SRC*/const UChar* src )
188 void* dst;
189 ThreadState* tst;
190 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
191 vg_assert(VG_(is_valid_tid)(tid));
192 // Bounds check
193 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
194 vg_assert(offset + size <= sizeof(VexGuestArchState));
195 // Copy
196 tst = & VG_(threads)[tid];
197 dst = NULL;
198 switch (shadowNo) {
199 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
200 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
201 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
203 vg_assert(dst != NULL);
204 VG_(memcpy)( dst, src, size);
208 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
209 const HChar*, Addr))
211 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
212 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
213 #if defined(VGA_x86)
214 (*f)(tid, "EAX", vex->guest_EAX);
215 (*f)(tid, "ECX", vex->guest_ECX);
216 (*f)(tid, "EDX", vex->guest_EDX);
217 (*f)(tid, "EBX", vex->guest_EBX);
218 (*f)(tid, "ESI", vex->guest_ESI);
219 (*f)(tid, "EDI", vex->guest_EDI);
220 (*f)(tid, "ESP", vex->guest_ESP);
221 (*f)(tid, "EBP", vex->guest_EBP);
222 #elif defined(VGA_amd64)
223 (*f)(tid, "RAX", vex->guest_RAX);
224 (*f)(tid, "RCX", vex->guest_RCX);
225 (*f)(tid, "RDX", vex->guest_RDX);
226 (*f)(tid, "RBX", vex->guest_RBX);
227 (*f)(tid, "RSI", vex->guest_RSI);
228 (*f)(tid, "RDI", vex->guest_RDI);
229 (*f)(tid, "RSP", vex->guest_RSP);
230 (*f)(tid, "RBP", vex->guest_RBP);
231 (*f)(tid, "R8" , vex->guest_R8 );
232 (*f)(tid, "R9" , vex->guest_R9 );
233 (*f)(tid, "R10", vex->guest_R10);
234 (*f)(tid, "R11", vex->guest_R11);
235 (*f)(tid, "R12", vex->guest_R12);
236 (*f)(tid, "R13", vex->guest_R13);
237 (*f)(tid, "R14", vex->guest_R14);
238 (*f)(tid, "R15", vex->guest_R15);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f)(tid, "GPR0" , vex->guest_GPR0 );
241 (*f)(tid, "GPR1" , vex->guest_GPR1 );
242 (*f)(tid, "GPR2" , vex->guest_GPR2 );
243 (*f)(tid, "GPR3" , vex->guest_GPR3 );
244 (*f)(tid, "GPR4" , vex->guest_GPR4 );
245 (*f)(tid, "GPR5" , vex->guest_GPR5 );
246 (*f)(tid, "GPR6" , vex->guest_GPR6 );
247 (*f)(tid, "GPR7" , vex->guest_GPR7 );
248 (*f)(tid, "GPR8" , vex->guest_GPR8 );
249 (*f)(tid, "GPR9" , vex->guest_GPR9 );
250 (*f)(tid, "GPR10", vex->guest_GPR10);
251 (*f)(tid, "GPR11", vex->guest_GPR11);
252 (*f)(tid, "GPR12", vex->guest_GPR12);
253 (*f)(tid, "GPR13", vex->guest_GPR13);
254 (*f)(tid, "GPR14", vex->guest_GPR14);
255 (*f)(tid, "GPR15", vex->guest_GPR15);
256 (*f)(tid, "GPR16", vex->guest_GPR16);
257 (*f)(tid, "GPR17", vex->guest_GPR17);
258 (*f)(tid, "GPR18", vex->guest_GPR18);
259 (*f)(tid, "GPR19", vex->guest_GPR19);
260 (*f)(tid, "GPR20", vex->guest_GPR20);
261 (*f)(tid, "GPR21", vex->guest_GPR21);
262 (*f)(tid, "GPR22", vex->guest_GPR22);
263 (*f)(tid, "GPR23", vex->guest_GPR23);
264 (*f)(tid, "GPR24", vex->guest_GPR24);
265 (*f)(tid, "GPR25", vex->guest_GPR25);
266 (*f)(tid, "GPR26", vex->guest_GPR26);
267 (*f)(tid, "GPR27", vex->guest_GPR27);
268 (*f)(tid, "GPR28", vex->guest_GPR28);
269 (*f)(tid, "GPR29", vex->guest_GPR29);
270 (*f)(tid, "GPR30", vex->guest_GPR30);
271 (*f)(tid, "GPR31", vex->guest_GPR31);
272 (*f)(tid, "CTR" , vex->guest_CTR );
273 (*f)(tid, "LR" , vex->guest_LR );
274 #elif defined(VGA_arm)
275 (*f)(tid, "R0" , vex->guest_R0 );
276 (*f)(tid, "R1" , vex->guest_R1 );
277 (*f)(tid, "R2" , vex->guest_R2 );
278 (*f)(tid, "R3" , vex->guest_R3 );
279 (*f)(tid, "R4" , vex->guest_R4 );
280 (*f)(tid, "R5" , vex->guest_R5 );
281 (*f)(tid, "R6" , vex->guest_R6 );
282 (*f)(tid, "R8" , vex->guest_R8 );
283 (*f)(tid, "R9" , vex->guest_R9 );
284 (*f)(tid, "R10", vex->guest_R10);
285 (*f)(tid, "R11", vex->guest_R11);
286 (*f)(tid, "R12", vex->guest_R12);
287 (*f)(tid, "R13", vex->guest_R13);
288 (*f)(tid, "R14", vex->guest_R14);
289 #elif defined(VGA_s390x)
290 (*f)(tid, "r0" , vex->guest_r0 );
291 (*f)(tid, "r1" , vex->guest_r1 );
292 (*f)(tid, "r2" , vex->guest_r2 );
293 (*f)(tid, "r3" , vex->guest_r3 );
294 (*f)(tid, "r4" , vex->guest_r4 );
295 (*f)(tid, "r5" , vex->guest_r5 );
296 (*f)(tid, "r6" , vex->guest_r6 );
297 (*f)(tid, "r7" , vex->guest_r7 );
298 (*f)(tid, "r8" , vex->guest_r8 );
299 (*f)(tid, "r9" , vex->guest_r9 );
300 (*f)(tid, "r10", vex->guest_r10);
301 (*f)(tid, "r11", vex->guest_r11);
302 (*f)(tid, "r12", vex->guest_r12);
303 (*f)(tid, "r13", vex->guest_r13);
304 (*f)(tid, "r14", vex->guest_r14);
305 (*f)(tid, "r15", vex->guest_r15);
306 #elif defined(VGA_mips32) || defined(VGA_mips64)
307 (*f)(tid, "r0" , vex->guest_r0 );
308 (*f)(tid, "r1" , vex->guest_r1 );
309 (*f)(tid, "r2" , vex->guest_r2 );
310 (*f)(tid, "r3" , vex->guest_r3 );
311 (*f)(tid, "r4" , vex->guest_r4 );
312 (*f)(tid, "r5" , vex->guest_r5 );
313 (*f)(tid, "r6" , vex->guest_r6 );
314 (*f)(tid, "r7" , vex->guest_r7 );
315 (*f)(tid, "r8" , vex->guest_r8 );
316 (*f)(tid, "r9" , vex->guest_r9 );
317 (*f)(tid, "r10", vex->guest_r10);
318 (*f)(tid, "r11", vex->guest_r11);
319 (*f)(tid, "r12", vex->guest_r12);
320 (*f)(tid, "r13", vex->guest_r13);
321 (*f)(tid, "r14", vex->guest_r14);
322 (*f)(tid, "r15", vex->guest_r15);
323 (*f)(tid, "r16", vex->guest_r16);
324 (*f)(tid, "r17", vex->guest_r17);
325 (*f)(tid, "r18", vex->guest_r18);
326 (*f)(tid, "r19", vex->guest_r19);
327 (*f)(tid, "r20", vex->guest_r20);
328 (*f)(tid, "r21", vex->guest_r21);
329 (*f)(tid, "r22", vex->guest_r22);
330 (*f)(tid, "r23", vex->guest_r23);
331 (*f)(tid, "r24", vex->guest_r24);
332 (*f)(tid, "r25", vex->guest_r25);
333 (*f)(tid, "r26", vex->guest_r26);
334 (*f)(tid, "r27", vex->guest_r27);
335 (*f)(tid, "r28", vex->guest_r28);
336 (*f)(tid, "r29", vex->guest_r29);
337 (*f)(tid, "r30", vex->guest_r30);
338 (*f)(tid, "r31", vex->guest_r31);
339 #elif defined(VGA_arm64)
340 (*f)(tid, "x0" , vex->guest_X0 );
341 (*f)(tid, "x1" , vex->guest_X1 );
342 (*f)(tid, "x2" , vex->guest_X2 );
343 (*f)(tid, "x3" , vex->guest_X3 );
344 (*f)(tid, "x4" , vex->guest_X4 );
345 (*f)(tid, "x5" , vex->guest_X5 );
346 (*f)(tid, "x6" , vex->guest_X6 );
347 (*f)(tid, "x7" , vex->guest_X7 );
348 (*f)(tid, "x8" , vex->guest_X8 );
349 (*f)(tid, "x9" , vex->guest_X9 );
350 (*f)(tid, "x10", vex->guest_X10);
351 (*f)(tid, "x11", vex->guest_X11);
352 (*f)(tid, "x12", vex->guest_X12);
353 (*f)(tid, "x13", vex->guest_X13);
354 (*f)(tid, "x14", vex->guest_X14);
355 (*f)(tid, "x15", vex->guest_X15);
356 (*f)(tid, "x16", vex->guest_X16);
357 (*f)(tid, "x17", vex->guest_X17);
358 (*f)(tid, "x18", vex->guest_X18);
359 (*f)(tid, "x19", vex->guest_X19);
360 (*f)(tid, "x20", vex->guest_X20);
361 (*f)(tid, "x21", vex->guest_X21);
362 (*f)(tid, "x22", vex->guest_X22);
363 (*f)(tid, "x23", vex->guest_X23);
364 (*f)(tid, "x24", vex->guest_X24);
365 (*f)(tid, "x25", vex->guest_X25);
366 (*f)(tid, "x26", vex->guest_X26);
367 (*f)(tid, "x27", vex->guest_X27);
368 (*f)(tid, "x28", vex->guest_X28);
369 (*f)(tid, "x29", vex->guest_X29);
370 (*f)(tid, "x30", vex->guest_X30);
371 #else
372 # error Unknown arch
373 #endif
377 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
379 ThreadId tid;
381 for (tid = 1; tid < VG_N_THREADS; tid++) {
382 if (VG_(is_valid_tid)(tid)
383 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
384 // live thread or thread instructed to die by another thread that
385 // called exit.
386 apply_to_GPs_of_tid(tid, f);
391 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
393 *tid = (ThreadId)(-1);
396 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
397 /*OUT*/Addr* stack_min,
398 /*OUT*/Addr* stack_max)
400 ThreadId i;
401 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
402 if (i == VG_INVALID_THREADID)
403 continue;
404 if (VG_(threads)[i].status != VgTs_Empty) {
405 *tid = i;
406 *stack_min = VG_(get_SP)(i);
407 *stack_max = VG_(threads)[i].client_stack_highest_byte;
408 return True;
411 return False;
414 Addr VG_(thread_get_stack_max)(ThreadId tid)
416 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
417 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
418 return VG_(threads)[tid].client_stack_highest_byte;
421 SizeT VG_(thread_get_stack_size)(ThreadId tid)
423 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
424 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
425 return VG_(threads)[tid].client_stack_szB;
428 Addr VG_(thread_get_altstack_min)(ThreadId tid)
430 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
431 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
432 return (Addr)VG_(threads)[tid].altstack.ss_sp;
435 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
437 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
438 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
439 return VG_(threads)[tid].altstack.ss_size;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done = False;
458 /* --- all archs --- */
459 static VexArch va = VexArch_INVALID;
460 static VexArchInfo vai;
462 #if defined(VGA_x86)
463 UInt VG_(machine_x86_have_mxcsr) = 0;
464 #endif
465 #if defined(VGA_ppc32)
466 UInt VG_(machine_ppc32_has_FP) = 0;
467 UInt VG_(machine_ppc32_has_VMX) = 0;
468 #endif
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong VG_(machine_ppc64_has_VMX) = 0;
471 #endif
472 #if defined(VGA_arm)
473 Int VG_(machine_arm_archlevel) = 4;
474 #endif
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
481 #include "pub_core_libcsetjmp.h"
482 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
483 static void handler_unsup_insn ( Int x ) {
484 VG_MINIMAL_LONGJMP(env_unsup_insn);
486 #endif
489 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
490 * handlers are installed. Determines the sizes affected by dcbz
491 * and dcbzl instructions and updates the given VexArchInfo structure
492 * accordingly.
494 * Not very defensive: assumes that as long as the dcbz/dcbzl
495 * instructions don't raise a SIGILL, that they will zero an aligned,
496 * contiguous block of memory of a sensible size. */
497 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
498 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
500 Int dcbz_szB = 0;
501 Int dcbzl_szB;
502 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
503 char test_block[4*MAX_DCBZL_SZB];
504 char *aligned = test_block;
505 Int i;
507 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
508 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
509 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
511 /* dcbz often clears 32B, although sometimes whatever the native cache
512 * block size is */
513 VG_(memset)(test_block, 0xff, sizeof(test_block));
514 __asm__ __volatile__("dcbz 0,%0"
515 : /*out*/
516 : "r" (aligned) /*in*/
517 : "memory" /*clobber*/);
518 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
519 if (!test_block[i])
520 ++dcbz_szB;
522 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
524 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
525 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
526 dcbzl_szB = 0; /* indicates unsupported */
528 else {
529 VG_(memset)(test_block, 0xff, sizeof(test_block));
530 /* some older assemblers won't understand the dcbzl instruction
531 * variant, so we directly emit the instruction ourselves */
532 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
533 : /*out*/
534 : "r" (aligned) /*in*/
535 : "memory", "r9" /*clobber*/);
536 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
537 if (!test_block[i])
538 ++dcbzl_szB;
540 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
543 arch_info->ppc_dcbz_szB = dcbz_szB;
544 arch_info->ppc_dcbzl_szB = dcbzl_szB;
546 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
547 dcbz_szB, dcbzl_szB);
548 # undef MAX_DCBZL_SZB
550 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
552 #ifdef VGA_s390x
554 /* Read /proc/cpuinfo. Look for lines like these
556 processor 0: version = FF, identification = 0117C9, machine = 2064
558 and return the machine model. If the machine model could not be determined
559 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
561 static UInt VG_(get_machine_model)(void)
563 static struct model_map {
564 const HChar name[5];
565 UInt id;
566 } model_map[] = {
567 { "2064", VEX_S390X_MODEL_Z900 },
568 { "2066", VEX_S390X_MODEL_Z800 },
569 { "2084", VEX_S390X_MODEL_Z990 },
570 { "2086", VEX_S390X_MODEL_Z890 },
571 { "2094", VEX_S390X_MODEL_Z9_EC },
572 { "2096", VEX_S390X_MODEL_Z9_BC },
573 { "2097", VEX_S390X_MODEL_Z10_EC },
574 { "2098", VEX_S390X_MODEL_Z10_BC },
575 { "2817", VEX_S390X_MODEL_Z196 },
576 { "2818", VEX_S390X_MODEL_Z114 },
577 { "2827", VEX_S390X_MODEL_ZEC12 },
578 { "2828", VEX_S390X_MODEL_ZBC12 },
579 { "2964", VEX_S390X_MODEL_Z13 },
580 { "2965", VEX_S390X_MODEL_Z13S },
583 Int model, n, fh;
584 SysRes fd;
585 SizeT num_bytes, file_buf_size;
586 HChar *p, *m, *model_name, *file_buf;
588 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
589 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
590 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
592 fh = sr_Res(fd);
594 /* Determine the size of /proc/cpuinfo.
595 Work around broken-ness in /proc file system implementation.
596 fstat returns a zero size for /proc/cpuinfo although it is
597 claimed to be a regular file. */
598 num_bytes = 0;
599 file_buf_size = 1000;
600 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
601 while (42) {
602 n = VG_(read)(fh, file_buf, file_buf_size);
603 if (n < 0) break;
605 num_bytes += n;
606 if (n < file_buf_size) break; /* reached EOF */
609 if (n < 0) num_bytes = 0; /* read error; ignore contents */
611 if (num_bytes > file_buf_size) {
612 VG_(free)( file_buf );
613 VG_(lseek)( fh, 0, VKI_SEEK_SET );
614 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
615 n = VG_(read)( fh, file_buf, num_bytes );
616 if (n < 0) num_bytes = 0;
619 file_buf[num_bytes] = '\0';
620 VG_(close)(fh);
622 /* Parse file */
623 model = VEX_S390X_MODEL_UNKNOWN;
624 for (p = file_buf; *p; ++p) {
625 /* Beginning of line */
626 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
628 m = VG_(strstr)( p, "machine" );
629 if (m == NULL) continue;
631 p = m + sizeof "machine" - 1;
632 while ( VG_(isspace)( *p ) || *p == '=') {
633 if (*p == '\n') goto next_line;
634 ++p;
637 model_name = p;
638 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
639 struct model_map *mm = model_map + n;
640 SizeT len = VG_(strlen)( mm->name );
641 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
642 VG_(isspace)( model_name[len] )) {
643 if (mm->id < model) model = mm->id;
644 p = model_name + len;
645 break;
648 /* Skip until end-of-line */
649 while (*p != '\n')
650 ++p;
651 next_line: ;
654 VG_(free)( file_buf );
655 VG_(debugLog)(1, "machine", "model = %s\n",
656 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
657 : model_map[model].name);
658 return model;
661 #endif /* defined(VGA_s390x) */
663 #if defined(VGA_mips32) || defined(VGA_mips64)
666 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
667 * determine what CPU it is (it searches only for the models that are or may be
668 * supported by Valgrind).
670 static Bool VG_(parse_cpuinfo)(void)
672 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
673 const char *search_Cavium_str= "cpu model\t\t: Cavium";
674 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
675 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
676 const char *search_MIPS_str = "cpu model\t\t: MIPS";
677 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
679 Int n, fh;
680 SysRes fd;
681 SizeT num_bytes, file_buf_size;
682 HChar *file_buf, *isa;
684 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
685 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
686 if ( sr_isError(fd) ) return False;
688 fh = sr_Res(fd);
690 /* Determine the size of /proc/cpuinfo.
691 Work around broken-ness in /proc file system implementation.
692 fstat returns a zero size for /proc/cpuinfo although it is
693 claimed to be a regular file. */
694 num_bytes = 0;
695 file_buf_size = 1000;
696 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
697 while (42) {
698 n = VG_(read)(fh, file_buf, file_buf_size);
699 if (n < 0) break;
701 num_bytes += n;
702 if (n < file_buf_size) break; /* reached EOF */
705 if (n < 0) num_bytes = 0; /* read error; ignore contents */
707 if (num_bytes > file_buf_size) {
708 VG_(free)( file_buf );
709 VG_(lseek)( fh, 0, VKI_SEEK_SET );
710 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
711 n = VG_(read)( fh, file_buf, num_bytes );
712 if (n < 0) num_bytes = 0;
715 file_buf[num_bytes] = '\0';
716 VG_(close)(fh);
718 /* Parse file */
719 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
720 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
721 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
722 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
723 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
724 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
725 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
726 vai.hwcaps = VEX_PRID_COMP_MIPS;
727 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
728 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
729 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
730 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
731 else {
732 /* Did not find string in the proc file. */
733 vai.hwcaps = 0;
734 VG_(free)(file_buf);
735 return False;
738 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
740 if (NULL != isa) {
741 if (VG_(strstr) (isa, "mips32r1") != NULL)
742 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
743 if (VG_(strstr) (isa, "mips32r2") != NULL)
744 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
745 if (VG_(strstr) (isa, "mips32r6") != NULL)
746 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
747 if (VG_(strstr) (isa, "mips64r1") != NULL)
748 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
749 if (VG_(strstr) (isa, "mips64r2") != NULL)
750 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
751 if (VG_(strstr) (isa, "mips64r6") != NULL)
752 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
755 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
756 * decide to change incorrect settings in
757 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
758 * The current settings show mips32r1, mips32r2 and mips64r1 as
759 * unsupported ISAs by Cavium MIPS CPUs.
761 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
762 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
763 VEX_MIPS_CPU_ISA_M64R1;
765 } else {
767 * Kernel does not provide information about supported ISAs.
768 * Populate the isa level flags based on the CPU model. That is our
769 * best guess.
771 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
772 case VEX_PRID_COMP_CAVIUM:
773 case VEX_PRID_COMP_NETLOGIC:
774 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
775 /* fallthrough */
776 case VEX_PRID_COMP_INGENIC_E1:
777 case VEX_PRID_COMP_MIPS:
778 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
779 /* fallthrough */
780 case VEX_PRID_COMP_BROADCOM:
781 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
782 break;
783 case VEX_PRID_COMP_LEGACY:
784 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
785 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
786 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
787 break;
788 default:
789 break;
792 VG_(free)(file_buf);
793 return True;
796 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
798 #if defined(VGP_arm64_linux)
800 /* Check to see whether we are running on a Cavium core, and if so auto-enable
801 the fallback LLSC implementation. See #369459. */
803 static Bool VG_(parse_cpuinfo)(void)
805 const char *search_Cavium_str = "CPU implementer\t: 0x43";
807 Int n, fh;
808 SysRes fd;
809 SizeT num_bytes, file_buf_size;
810 HChar *file_buf;
812 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
813 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
814 if ( sr_isError(fd) ) return False;
816 fh = sr_Res(fd);
818 /* Determine the size of /proc/cpuinfo.
819 Work around broken-ness in /proc file system implementation.
820 fstat returns a zero size for /proc/cpuinfo although it is
821 claimed to be a regular file. */
822 num_bytes = 0;
823 file_buf_size = 1000;
824 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
825 while (42) {
826 n = VG_(read)(fh, file_buf, file_buf_size);
827 if (n < 0) break;
829 num_bytes += n;
830 if (n < file_buf_size) break; /* reached EOF */
833 if (n < 0) num_bytes = 0; /* read error; ignore contents */
835 if (num_bytes > file_buf_size) {
836 VG_(free)( file_buf );
837 VG_(lseek)( fh, 0, VKI_SEEK_SET );
838 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
839 n = VG_(read)( fh, file_buf, num_bytes );
840 if (n < 0) num_bytes = 0;
843 file_buf[num_bytes] = '\0';
844 VG_(close)(fh);
846 /* Parse file */
847 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
848 vai.arm64_requires_fallback_LLSC = True;
850 VG_(free)(file_buf);
851 return True;
854 #endif /* defined(VGP_arm64_linux) */
856 Bool VG_(machine_get_hwcaps)( void )
858 vg_assert(hwcaps_done == False);
859 hwcaps_done = True;
861 // Whack default settings into vai, so that we only need to fill in
862 // any interesting bits.
863 LibVEX_default_VexArchInfo(&vai);
865 #if defined(VGA_x86)
866 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
867 UInt eax, ebx, ecx, edx, max_extended;
868 HChar vstr[13];
869 vstr[0] = 0;
871 if (!VG_(has_cpuid)())
872 /* we can't do cpuid at all. Give up. */
873 return False;
875 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
876 if (eax < 1)
877 /* we can't ask for cpuid(x) for x > 0. Give up. */
878 return False;
880 /* Get processor ID string, and max basic/extended index
881 values. */
882 VG_(memcpy)(&vstr[0], &ebx, 4);
883 VG_(memcpy)(&vstr[4], &edx, 4);
884 VG_(memcpy)(&vstr[8], &ecx, 4);
885 vstr[12] = 0;
887 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
888 max_extended = eax;
890 /* get capabilities bits into edx */
891 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
893 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
894 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
895 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
897 /* cmpxchg8b is a minimum requirement now; if we don't have it we
898 must simply give up. But all CPUs since Pentium-I have it, so
899 that doesn't seem like much of a restriction. */
900 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
901 if (!have_cx8)
902 return False;
904 /* Figure out if this is an AMD that can do MMXEXT. */
905 have_mmxext = False;
906 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
907 && max_extended >= 0x80000001) {
908 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
909 /* Some older AMD processors support a sse1 subset (Integer SSE). */
910 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
913 /* Figure out if this is an AMD or Intel that can do LZCNT. */
914 have_lzcnt = False;
915 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
916 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
917 && max_extended >= 0x80000001) {
918 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
919 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
922 /* Intel processors don't define the mmxext extension, but since it
923 is just a sse1 subset always define it when we have sse1. */
924 if (have_sse1)
925 have_mmxext = True;
927 va = VexArchX86;
928 vai.endness = VexEndnessLE;
930 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
931 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
932 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
933 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
934 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
935 if (have_lzcnt)
936 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
937 VG_(machine_x86_have_mxcsr) = 1;
938 } else if (have_sse2 && have_sse1 && have_mmxext) {
939 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
940 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
941 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
942 if (have_lzcnt)
943 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
944 VG_(machine_x86_have_mxcsr) = 1;
945 } else if (have_sse1 && have_mmxext) {
946 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
947 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
948 VG_(machine_x86_have_mxcsr) = 1;
949 } else if (have_mmxext) {
950 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
951 VG_(machine_x86_have_mxcsr) = 0;
952 } else {
953 vai.hwcaps = 0; /*baseline - no sse at all*/
954 VG_(machine_x86_have_mxcsr) = 0;
957 VG_(machine_get_cache_info)(&vai);
959 return True;
962 #elif defined(VGA_amd64)
963 { Bool have_sse3, have_ssse3, have_cx8, have_cx16;
964 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
965 Bool have_rdtscp, have_rdrand, have_f16c;
966 UInt eax, ebx, ecx, edx, max_basic, max_extended;
967 ULong xgetbv_0 = 0;
968 HChar vstr[13];
969 vstr[0] = 0;
971 have_sse3 = have_ssse3 = have_cx8 = have_cx16
972 = have_lzcnt = have_avx = have_bmi = have_avx2
973 = have_rdtscp = have_rdrand = have_f16c = False;
975 eax = ebx = ecx = edx = max_basic = max_extended = 0;
977 if (!VG_(has_cpuid)())
978 /* we can't do cpuid at all. Give up. */
979 return False;
981 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
982 max_basic = eax;
983 if (max_basic < 1)
984 /* we can't ask for cpuid(x) for x > 0. Give up. */
985 return False;
987 /* Get processor ID string, and max basic/extended index
988 values. */
989 VG_(memcpy)(&vstr[0], &ebx, 4);
990 VG_(memcpy)(&vstr[4], &edx, 4);
991 VG_(memcpy)(&vstr[8], &ecx, 4);
992 vstr[12] = 0;
994 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
995 max_extended = eax;
997 /* get capabilities bits into edx */
998 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
1000 // we assume that SSE1 and SSE2 are available by default
1001 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
1002 have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */
1003 // fma is ecx:12
1004 // sse41 is ecx:19
1005 // sse42 is ecx:20
1006 // xsave is ecx:26
1007 // osxsave is ecx:27
1008 // avx is ecx:28
1009 have_f16c = (ecx & (1<<29)) != 0; /* True => have F16C insns */
1010 have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */
1012 have_avx = False;
1013 /* have_fma = False; */
1014 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1015 /* Processor supports AVX instructions and XGETBV is enabled
1016 by OS and AVX instructions are enabled by the OS. */
1017 ULong w;
1018 __asm__ __volatile__("movq $0,%%rcx ; "
1019 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1020 "movq %%rax,%0"
1021 :/*OUT*/"=r"(w) :/*IN*/
1022 :/*TRASH*/"rdx","rcx","rax");
1023 xgetbv_0 = w;
1024 if ((xgetbv_0 & 7) == 7) {
1025 /* Only say we have AVX if the XSAVE-allowable
1026 bitfield-mask allows x87, SSE and AVX state. We could
1027 actually run with a more restrictive XGETBV(0) value,
1028 but VEX's implementation of XSAVE and XRSTOR assumes
1029 that all 3 bits are enabled.
1031 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1032 state component [2] (the YMM high halves) are located in
1033 the XSAVE image at offsets 576 .. 831. So we have to
1034 check that here before declaring AVX to be supported. */
1035 UInt eax2, ebx2, ecx2, edx2;
1036 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1037 if (ebx2 == 576 && eax2 == 256) {
1038 have_avx = True;
1040 /* have_fma = (ecx & (1<<12)) != 0; */
1041 /* have_fma: Probably correct, but gcc complains due to
1042 unusedness. */
1046 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1047 must simply give up. But all CPUs since Pentium-I have it, so
1048 that doesn't seem like much of a restriction. */
1049 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1050 if (!have_cx8)
1051 return False;
1053 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1054 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1056 /* Figure out if this CPU can do LZCNT. */
1057 have_lzcnt = False;
1058 if (max_extended >= 0x80000001) {
1059 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1060 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1063 /* Can we do RDTSCP? */
1064 have_rdtscp = False;
1065 if (max_extended >= 0x80000001) {
1066 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1067 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1070 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1071 have_bmi = False;
1072 have_avx2 = False;
1073 if (have_avx && max_basic >= 7) {
1074 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1075 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1076 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1079 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1080 it's convenient to restrict them to the AVX case since the simulated
1081 CPUID we'll offer them on has AVX as a base. */
1082 if (!have_avx) {
1083 have_f16c = False;
1084 have_rdrand = False;
1087 va = VexArchAMD64;
1088 vai.endness = VexEndnessLE;
1089 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1090 | (have_ssse3 ? VEX_HWCAPS_AMD64_SSSE3 : 0)
1091 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1092 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1093 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1094 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1095 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1096 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0)
1097 | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0)
1098 | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0);
1100 VG_(machine_get_cache_info)(&vai);
1102 return True;
1105 #elif defined(VGA_ppc32)
1107 /* Find out which subset of the ppc32 instruction set is supported by
1108 verifying whether various ppc32 instructions generate a SIGILL
1109 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1110 AT_PLATFORM entries in the ELF auxiliary table -- see also
1111 the_iifii.client_auxv in m_main.c.
1113 vki_sigset_t saved_set, tmp_set;
1114 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1115 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1117 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1118 volatile Bool have_isa_2_07, have_isa_3_0;
1119 Int r;
1121 /* This is a kludge. Really we ought to back-convert saved_act
1122 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1123 since that's a no-op on all ppc32 platforms so far supported,
1124 it's not worth the typing effort. At least include most basic
1125 sanity check: */
1126 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1128 VG_(sigemptyset)(&tmp_set);
1129 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1130 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1132 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1133 vg_assert(r == 0);
1135 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1136 vg_assert(r == 0);
1137 tmp_sigill_act = saved_sigill_act;
1139 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1140 vg_assert(r == 0);
1141 tmp_sigfpe_act = saved_sigfpe_act;
1143 /* NODEFER: signal handler does not return (from the kernel's point of
1144 view), hence if it is to successfully catch a signal more than once,
1145 we need the NODEFER flag. */
1146 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1147 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1148 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1149 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1150 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1151 vg_assert(r == 0);
1153 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1154 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1155 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1156 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1157 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1158 vg_assert(r == 0);
1160 /* standard FP insns */
1161 have_F = True;
1162 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1163 have_F = False;
1164 } else {
1165 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1168 /* Altivec insns */
1169 have_V = True;
1170 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1171 have_V = False;
1172 } else {
1173 /* Unfortunately some older assemblers don't speak Altivec (or
1174 choose not to), so to be safe we directly emit the 32-bit
1175 word corresponding to "vor 0,0,0". This fixes a build
1176 problem that happens on Debian 3.1 (ppc32), and probably
1177 various other places. */
1178 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1181 /* General-Purpose optional (fsqrt, fsqrts) */
1182 have_FX = True;
1183 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1184 have_FX = False;
1185 } else {
1186 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1189 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1190 have_GX = True;
1191 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1192 have_GX = False;
1193 } else {
1194 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1197 /* VSX support implies Power ISA 2.06 */
1198 have_VX = True;
1199 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1200 have_VX = False;
1201 } else {
1202 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1205 /* Check for Decimal Floating Point (DFP) support. */
1206 have_DFP = True;
1207 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1208 have_DFP = False;
1209 } else {
1210 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1213 /* Check for ISA 2.07 support. */
1214 have_isa_2_07 = True;
1215 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1216 have_isa_2_07 = False;
1217 } else {
1218 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1221 /* Check for ISA 3.0 support. */
1222 have_isa_3_0 = True;
1223 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1224 have_isa_3_0 = False;
1225 } else {
1226 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1229 /* determine dcbz/dcbzl sizes while we still have the signal
1230 * handlers registered */
1231 find_ppc_dcbz_sz(&vai);
1233 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1234 vg_assert(r == 0);
1235 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1236 vg_assert(r == 0);
1237 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1238 vg_assert(r == 0);
1239 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1240 (Int)have_F, (Int)have_V, (Int)have_FX,
1241 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1242 (Int)have_isa_2_07, (Int)have_isa_3_0);
1243 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1244 if (have_V && !have_F)
1245 have_V = False;
1246 if (have_FX && !have_F)
1247 have_FX = False;
1248 if (have_GX && !have_F)
1249 have_GX = False;
1251 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1252 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1254 va = VexArchPPC32;
1255 vai.endness = VexEndnessBE;
1257 vai.hwcaps = 0;
1258 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1259 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1260 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1261 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1262 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1263 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1264 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1265 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1267 VG_(machine_get_cache_info)(&vai);
1269 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1270 called before we're ready to go. */
1271 return True;
1274 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1276 /* Same instruction set detection algorithm as for ppc32. */
1277 vki_sigset_t saved_set, tmp_set;
1278 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1279 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1281 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1282 volatile Bool have_isa_2_07, have_isa_3_0;
1283 Int r;
1285 /* This is a kludge. Really we ought to back-convert saved_act
1286 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1287 since that's a no-op on all ppc64 platforms so far supported,
1288 it's not worth the typing effort. At least include most basic
1289 sanity check: */
1290 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1292 VG_(sigemptyset)(&tmp_set);
1293 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1294 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1296 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1297 vg_assert(r == 0);
1299 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1300 vg_assert(r == 0);
1301 tmp_sigill_act = saved_sigill_act;
1303 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1304 tmp_sigfpe_act = saved_sigfpe_act;
1306 /* NODEFER: signal handler does not return (from the kernel's point of
1307 view), hence if it is to successfully catch a signal more than once,
1308 we need the NODEFER flag. */
1309 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1310 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1311 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1312 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1313 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1315 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1316 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1317 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1318 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1319 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1321 /* standard FP insns */
1322 have_F = True;
1323 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1324 have_F = False;
1325 } else {
1326 __asm__ __volatile__("fmr 0,0");
1329 /* Altivec insns */
1330 have_V = True;
1331 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1332 have_V = False;
1333 } else {
1334 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1337 /* General-Purpose optional (fsqrt, fsqrts) */
1338 have_FX = True;
1339 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1340 have_FX = False;
1341 } else {
1342 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1345 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1346 have_GX = True;
1347 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1348 have_GX = False;
1349 } else {
1350 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1353 /* VSX support implies Power ISA 2.06 */
1354 have_VX = True;
1355 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1356 have_VX = False;
1357 } else {
1358 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1361 /* Check for Decimal Floating Point (DFP) support. */
1362 have_DFP = True;
1363 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1364 have_DFP = False;
1365 } else {
1366 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1369 /* Check for ISA 2.07 support. */
1370 have_isa_2_07 = True;
1371 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1372 have_isa_2_07 = False;
1373 } else {
1374 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1377 /* Check for ISA 3.0 support. */
1378 have_isa_3_0 = True;
1379 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1380 have_isa_3_0 = False;
1381 } else {
1382 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1385 /* determine dcbz/dcbzl sizes while we still have the signal
1386 * handlers registered */
1387 find_ppc_dcbz_sz(&vai);
1389 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1390 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1391 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1392 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1393 (Int)have_F, (Int)have_V, (Int)have_FX,
1394 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1395 (Int)have_isa_2_07, (int)have_isa_3_0);
1396 /* on ppc64be, if we don't even have FP, just give up. */
1397 if (!have_F)
1398 return False;
1400 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1402 va = VexArchPPC64;
1403 # if defined(VKI_LITTLE_ENDIAN)
1404 vai.endness = VexEndnessLE;
1405 # elif defined(VKI_BIG_ENDIAN)
1406 vai.endness = VexEndnessBE;
1407 # else
1408 vai.endness = VexEndness_INVALID;
1409 # endif
1411 vai.hwcaps = 0;
1412 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1413 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1414 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1415 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1416 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1417 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1418 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1420 VG_(machine_get_cache_info)(&vai);
1422 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1423 called before we're ready to go. */
1424 return True;
1427 #elif defined(VGA_s390x)
1429 # include "libvex_s390x_common.h"
1432 /* Instruction set detection code borrowed from ppc above. */
1433 vki_sigset_t saved_set, tmp_set;
1434 vki_sigaction_fromK_t saved_sigill_act;
1435 vki_sigaction_toK_t tmp_sigill_act;
1437 volatile Bool have_LDISP, have_STFLE;
1438 Int i, r, model;
1440 /* If the model is "unknown" don't treat this as an error. Assume
1441 this is a brand-new machine model for which we don't have the
1442 identification yet. Keeping fingers crossed. */
1443 model = VG_(get_machine_model)();
1445 /* Unblock SIGILL and stash away the old action for that signal */
1446 VG_(sigemptyset)(&tmp_set);
1447 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1449 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1450 vg_assert(r == 0);
1452 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1453 vg_assert(r == 0);
1454 tmp_sigill_act = saved_sigill_act;
1456 /* NODEFER: signal handler does not return (from the kernel's point of
1457 view), hence if it is to successfully catch a signal more than once,
1458 we need the NODEFER flag. */
1459 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1460 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1461 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1462 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1463 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1465 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1466 is not supported on z900. */
1468 have_LDISP = True;
1469 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1470 have_LDISP = False;
1471 } else {
1472 /* BASR loads the address of the next insn into r1. Needed to avoid
1473 a segfault in XY. */
1474 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1475 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1476 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1479 /* Check availability of STFLE. If available store facility bits
1480 in hoststfle. */
1481 ULong hoststfle[S390_NUM_FACILITY_DW];
1483 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1484 hoststfle[i] = 0;
1486 have_STFLE = True;
1487 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1488 have_STFLE = False;
1489 } else {
1490 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1492 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1493 : "=m" (hoststfle), "+d"(reg0)
1494 : : "cc", "memory");
1497 /* Restore signals */
1498 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1499 vg_assert(r == 0);
1500 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1501 vg_assert(r == 0);
1502 va = VexArchS390X;
1503 vai.endness = VexEndnessBE;
1505 vai.hwcaps = model;
1506 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1507 if (have_LDISP) {
1508 /* Use long displacement only on machines >= z990. For all other
1509 machines it is millicoded and therefore slow. */
1510 if (model >= VEX_S390X_MODEL_Z990)
1511 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1514 /* Detect presence of certain facilities using the STFLE insn.
1515 Note, that these facilities were introduced at the same time or later
1516 as STFLE, so the absence of STLFE implies the absence of the facility
1517 we're trying to detect. */
1518 struct fac_hwcaps_map {
1519 UInt installed;
1520 UInt facility_bit;
1521 UInt hwcaps_bit;
1522 const HChar name[6]; // may need adjustment for new facility names
1523 } fac_hwcaps[] = {
1524 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1525 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1526 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1527 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1528 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1529 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1530 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1531 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1532 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1533 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1534 { False, S390_FAC_VX, VEX_HWCAPS_S390X_VX, "VX" },
1535 { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" }
1538 /* Set hwcaps according to the detected facilities */
1539 UChar dw_number = 0;
1540 UChar fac_bit = 0;
1541 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1542 vg_assert(fac_hwcaps[i].facility_bit <= 191); // for now
1543 dw_number = fac_hwcaps[i].facility_bit / 64;
1544 fac_bit = fac_hwcaps[i].facility_bit % 64;
1545 if (hoststfle[dw_number] & (1ULL << (63 - fac_bit))) {
1546 fac_hwcaps[i].installed = True;
1547 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1551 /* Build up a string showing the probed-for facilities */
1552 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1553 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1554 7 + 1 + 4 + 2 // machine %4d
1555 + 1]; // \0
1556 HChar *p = fac_str;
1557 p += VG_(sprintf)(p, "machine %4d ", model);
1558 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1559 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1560 fac_hwcaps[i].installed);
1562 *p++ = '\0';
1564 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1565 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1567 VG_(machine_get_cache_info)(&vai);
1569 return True;
1572 #elif defined(VGA_arm)
1574 /* Same instruction set detection algorithm as for ppc32. */
1575 vki_sigset_t saved_set, tmp_set;
1576 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1577 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1579 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1580 volatile Int archlevel;
1581 Int r;
1583 /* This is a kludge. Really we ought to back-convert saved_act
1584 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1585 since that's a no-op on all ppc64 platforms so far supported,
1586 it's not worth the typing effort. At least include most basic
1587 sanity check: */
1588 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1590 VG_(sigemptyset)(&tmp_set);
1591 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1592 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1594 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1595 vg_assert(r == 0);
1597 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1598 vg_assert(r == 0);
1599 tmp_sigill_act = saved_sigill_act;
1601 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1602 tmp_sigfpe_act = saved_sigfpe_act;
1604 /* NODEFER: signal handler does not return (from the kernel's point of
1605 view), hence if it is to successfully catch a signal more than once,
1606 we need the NODEFER flag. */
1607 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1608 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1609 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1610 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1611 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1613 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1614 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1615 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1616 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1617 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1619 /* VFP insns */
1620 have_VFP = True;
1621 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1622 have_VFP = False;
1623 } else {
1624 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1626 /* There are several generation of VFP extension but they differs very
1627 little so for now we will not distinguish them. */
1628 have_VFP2 = have_VFP;
1629 have_VFP3 = have_VFP;
1631 /* NEON insns */
1632 have_NEON = True;
1633 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1634 have_NEON = False;
1635 } else {
1636 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1639 /* ARM architecture level */
1640 archlevel = 5; /* v5 will be base level */
1641 if (archlevel < 7) {
1642 archlevel = 7;
1643 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1644 archlevel = 5;
1645 } else {
1646 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1649 if (archlevel < 6) {
1650 archlevel = 6;
1651 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1652 archlevel = 5;
1653 } else {
1654 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1658 /* ARMv8 insns */
1659 have_V8 = True;
1660 if (archlevel == 7) {
1661 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1662 have_V8 = False;
1663 } else {
1664 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1666 if (have_V8 && have_NEON && have_VFP3) {
1667 archlevel = 8;
1671 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1672 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1673 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1674 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1675 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1677 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1678 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1679 (Int)have_NEON);
1681 VG_(machine_arm_archlevel) = archlevel;
1683 va = VexArchARM;
1684 vai.endness = VexEndnessLE;
1686 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1687 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1688 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1689 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1690 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1692 VG_(machine_get_cache_info)(&vai);
1694 return True;
1697 #elif defined(VGA_arm64)
1699 va = VexArchARM64;
1700 vai.endness = VexEndnessLE;
1702 /* So far there are no variants. */
1703 vai.hwcaps = 0;
1705 VG_(machine_get_cache_info)(&vai);
1707 /* Check whether we need to use the fallback LLSC implementation.
1708 If the check fails, give up. */
1709 if (! VG_(parse_cpuinfo)())
1710 return False;
1712 /* 0 denotes 'not set'. The range of legitimate values here,
1713 after being set that is, is 2 though 17 inclusive. */
1714 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1715 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1716 ULong ctr_el0;
1717 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1718 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1719 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1720 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1721 "ctr_el0.iMinLine_szB = %d\n",
1722 1 << vai.arm64_dMinLine_lg2_szB,
1723 1 << vai.arm64_iMinLine_lg2_szB);
1724 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1725 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1727 return True;
1730 #elif defined(VGA_mips32)
1732 /* Define the position of F64 bit in FIR register. */
1733 # define FP64 22
1734 va = VexArchMIPS32;
1735 if (!VG_(parse_cpuinfo)())
1736 return False;
1738 # if defined(VKI_LITTLE_ENDIAN)
1739 vai.endness = VexEndnessLE;
1740 # elif defined(VKI_BIG_ENDIAN)
1741 vai.endness = VexEndnessBE;
1742 # else
1743 vai.endness = VexEndness_INVALID;
1744 # endif
1746 /* Same instruction set detection algorithm as for ppc32/arm... */
1747 vki_sigset_t saved_set, tmp_set;
1748 vki_sigaction_fromK_t saved_sigill_act;
1749 vki_sigaction_toK_t tmp_sigill_act;
1751 volatile Bool have_DSP, have_DSPr2, have_MSA;
1752 Int r;
1754 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1756 VG_(sigemptyset)(&tmp_set);
1757 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1759 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1760 vg_assert(r == 0);
1762 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1763 vg_assert(r == 0);
1764 tmp_sigill_act = saved_sigill_act;
1766 /* NODEFER: signal handler does not return (from the kernel's point of
1767 view), hence if it is to successfully catch a signal more than once,
1768 we need the NODEFER flag. */
1769 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1770 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1771 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1772 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1773 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1775 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1777 /* MSA instructions. */
1778 have_MSA = True;
1779 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1780 have_MSA = False;
1781 } else {
1782 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1784 if (have_MSA) {
1785 vai.hwcaps |= VEX_PRID_IMP_P5600;
1786 } else {
1787 /* DSPr2 instructions. */
1788 have_DSPr2 = True;
1789 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1790 have_DSPr2 = False;
1791 } else {
1792 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1794 if (have_DSPr2) {
1795 /* We assume it's 74K, since it can run DSPr2. */
1796 vai.hwcaps |= VEX_PRID_IMP_74K;
1797 } else {
1798 /* DSP instructions. */
1799 have_DSP = True;
1800 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1801 have_DSP = False;
1802 } else {
1803 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1805 if (have_DSP) {
1806 /* We assume it's 34K, since it has support for DSP. */
1807 vai.hwcaps |= VEX_PRID_IMP_34K;
1813 # if defined(VGP_mips32_linux)
1814 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
1815 # else
1816 Int fpmode = -1;
1817 # endif
1819 if (fpmode < 0) {
1820 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1821 we are using alternative way to determine FP mode */
1822 ULong result = 0;
1824 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
1825 __asm__ volatile (
1826 ".set push\n\t"
1827 ".set noreorder\n\t"
1828 ".set oddspreg\n\t"
1829 ".set hardfloat\n\t"
1830 "lui $t0, 0x3FF0\n\t"
1831 "ldc1 $f0, %0\n\t"
1832 "mtc1 $t0, $f1\n\t"
1833 "sdc1 $f0, %0\n\t"
1834 ".set pop\n\t"
1835 : "+m"(result)
1837 : "t0", "$f0", "$f1", "memory");
1839 fpmode = (result != 0x3FF0000000000000ull);
1843 if (fpmode != 0)
1844 vai.hwcaps |= VEX_MIPS_HOST_FR;
1846 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1847 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1848 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1850 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1851 VG_(machine_get_cache_info)(&vai);
1853 return True;
1856 #elif defined(VGA_mips64)
1858 va = VexArchMIPS64;
1859 if (!VG_(parse_cpuinfo)())
1860 return False;
1862 # if defined(VKI_LITTLE_ENDIAN)
1863 vai.endness = VexEndnessLE;
1864 # elif defined(VKI_BIG_ENDIAN)
1865 vai.endness = VexEndnessBE;
1866 # else
1867 vai.endness = VexEndness_INVALID;
1868 # endif
1870 vai.hwcaps |= VEX_MIPS_HOST_FR;
1872 /* Same instruction set detection algorithm as for ppc32/arm... */
1873 vki_sigset_t saved_set, tmp_set;
1874 vki_sigaction_fromK_t saved_sigill_act;
1875 vki_sigaction_toK_t tmp_sigill_act;
1877 volatile Bool have_MSA;
1878 Int r;
1880 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1882 VG_(sigemptyset)(&tmp_set);
1883 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1885 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1886 vg_assert(r == 0);
1888 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1889 vg_assert(r == 0);
1890 tmp_sigill_act = saved_sigill_act;
1892 /* NODEFER: signal handler does not return (from the kernel's point of
1893 view), hence if it is to successfully catch a signal more than once,
1894 we need the NODEFER flag. */
1895 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1896 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1897 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1898 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1899 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1901 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1903 /* MSA instructions */
1904 have_MSA = True;
1905 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1906 have_MSA = False;
1907 } else {
1908 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1910 if (have_MSA) {
1911 vai.hwcaps |= VEX_PRID_IMP_P5600;
1915 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1916 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1917 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1919 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1921 VG_(machine_get_cache_info)(&vai);
1923 return True;
1926 #else
1927 # error "Unknown arch"
1928 #endif
1931 /* Notify host cpu instruction cache line size. */
1932 #if defined(VGA_ppc32)
1933 void VG_(machine_ppc32_set_clszB)( Int szB )
1935 vg_assert(hwcaps_done);
1937 /* Either the value must not have been set yet (zero) or we can
1938 tolerate it being set to the same value multiple times, as the
1939 stack scanning logic in m_main is a bit stupid. */
1940 vg_assert(vai.ppc_icache_line_szB == 0
1941 || vai.ppc_icache_line_szB == szB);
1943 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1944 vai.ppc_icache_line_szB = szB;
1946 #endif
1949 /* Notify host cpu instruction cache line size. */
1950 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1951 void VG_(machine_ppc64_set_clszB)( Int szB )
1953 vg_assert(hwcaps_done);
1955 /* Either the value must not have been set yet (zero) or we can
1956 tolerate it being set to the same value multiple times, as the
1957 stack scanning logic in m_main is a bit stupid. */
1958 vg_assert(vai.ppc_icache_line_szB == 0
1959 || vai.ppc_icache_line_szB == szB);
1961 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1962 vai.ppc_icache_line_szB = szB;
1964 #endif
1967 /* Notify host's ability to handle NEON instructions. */
1968 #if defined(VGA_arm)
1969 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1971 vg_assert(hwcaps_done);
1972 /* There's nothing else we can sanity check. */
1974 if (has_neon) {
1975 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1976 } else {
1977 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1980 #endif
1983 /* Fetch host cpu info, once established. */
1984 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1985 /*OUT*/VexArchInfo* pVai )
1987 vg_assert(hwcaps_done);
1988 if (pVa) *pVa = va;
1989 if (pVai) *pVai = vai;
1993 /* Returns the size of the largest guest register that we will
1994 simulate in this run. This depends on both the guest architecture
1995 and on the specific capabilities we are simulating for that guest
1996 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1997 or 32. General rule: if in doubt, return a value larger than
1998 reality.
2000 This information is needed by Cachegrind and Callgrind to decide
2001 what the minimum cache line size they are prepared to simulate is.
2002 Basically require that the minimum cache line size is at least as
2003 large as the largest register that might get transferred to/from
2004 memory, so as to guarantee that any such transaction can straddle
2005 at most 2 cache lines.
2007 Int VG_(machine_get_size_of_largest_guest_register) ( void )
2009 vg_assert(hwcaps_done);
2010 /* Once hwcaps_done is True, we can fish around inside va/vai to
2011 find the information we need. */
2013 # if defined(VGA_x86)
2014 vg_assert(va == VexArchX86);
2015 /* We don't support AVX, so 32 is out. At the other end, even if
2016 we don't support any SSE, the X87 can generate 10 byte
2017 transfers, so let's say 16 to be on the safe side. Hence the
2018 answer is always 16. */
2019 return 16;
2021 # elif defined(VGA_amd64)
2022 /* if AVX then 32 else 16 */
2023 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
2025 # elif defined(VGA_ppc32)
2026 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2027 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
2028 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
2029 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
2030 return 8;
2032 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2033 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2034 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
2035 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
2036 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
2037 return 8;
2039 # elif defined(VGA_s390x)
2040 return 8;
2042 # elif defined(VGA_arm)
2043 /* Really it depends whether or not we have NEON, but let's just
2044 assume we always do. */
2045 return 16;
2047 # elif defined(VGA_arm64)
2048 /* ARM64 always has Neon, AFAICS. */
2049 return 16;
2051 # elif defined(VGA_mips32)
2052 /* The guest state implies 4, but that can't really be true, can
2053 it? */
2054 return 8;
2056 # elif defined(VGA_mips64)
2057 return 8;
2059 # else
2060 # error "Unknown arch"
2061 # endif
2065 // Given a pointer to a function as obtained by "& functionname" in C,
2066 // produce a pointer to the actual entry point for the function.
2067 void* VG_(fnptr_to_fnentry)( void* f )
2069 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2070 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2071 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2072 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2073 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2074 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2075 return f;
2076 # elif defined(VGP_ppc64be_linux)
2077 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2078 3-word function descriptor, of which the first word is the entry
2079 address. */
2080 UWord* descr = (UWord*)f;
2081 return (void*)(descr[0]);
2082 # else
2083 # error "Unknown platform"
2084 # endif
2087 /*--------------------------------------------------------------------*/
2088 /*--- end ---*/
2089 /*--------------------------------------------------------------------*/