FreeBSD Helgrind: turn off check for locks held on exit for FreeBSD 14.2
[valgrind.git] / coregrind / m_machine.c
blob234efb312d2c4a7c863ff8ad6b059354ade86a7a
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcprint.h"
34 #include "pub_core_libcfile.h"
35 #include "pub_core_libcprint.h"
36 #include "pub_core_libcproc.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
48 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
50 Addr VG_(get_IP) ( ThreadId tid ) {
51 return INSTR_PTR( VG_(threads)[tid].arch );
53 Addr VG_(get_SP) ( ThreadId tid ) {
54 return STACK_PTR( VG_(threads)[tid].arch );
56 Addr VG_(get_FP) ( ThreadId tid ) {
57 return FRAME_PTR( VG_(threads)[tid].arch );
60 Addr VG_(get_SP_s1) ( ThreadId tid ) {
61 return STACK_PTR_S1( VG_(threads)[tid].arch );
63 void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
64 STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
67 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
68 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
70 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
71 STACK_PTR( VG_(threads)[tid].arch ) = sp;
74 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
75 ThreadId tid )
77 # if defined(VGA_x86)
78 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
79 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
80 regs->misc.X86.r_ebp
81 = VG_(threads)[tid].arch.vex.guest_EBP;
82 # elif defined(VGA_amd64)
83 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
84 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
85 regs->misc.AMD64.r_rbp
86 = VG_(threads)[tid].arch.vex.guest_RBP;
87 # elif defined(VGA_ppc32)
88 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
89 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
90 regs->misc.PPC32.r_lr
91 = VG_(threads)[tid].arch.vex.guest_LR;
92 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
93 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
94 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
95 regs->misc.PPC64.r_lr
96 = VG_(threads)[tid].arch.vex.guest_LR;
97 # elif defined(VGA_arm)
98 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
99 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
100 regs->misc.ARM.r14
101 = VG_(threads)[tid].arch.vex.guest_R14;
102 regs->misc.ARM.r12
103 = VG_(threads)[tid].arch.vex.guest_R12;
104 regs->misc.ARM.r11
105 = VG_(threads)[tid].arch.vex.guest_R11;
106 regs->misc.ARM.r7
107 = VG_(threads)[tid].arch.vex.guest_R7;
108 # elif defined(VGA_arm64)
109 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
110 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
111 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
112 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
113 # elif defined(VGA_s390x)
114 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
115 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
116 regs->misc.S390X.r_fp
117 = VG_(threads)[tid].arch.vex.guest_FP;
118 regs->misc.S390X.r_lr
119 = VG_(threads)[tid].arch.vex.guest_LR;
120 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
121 regs->misc.S390X.r_f0
122 = VG_(threads)[tid].arch.vex.guest_v0.w64[0];
123 regs->misc.S390X.r_f1
124 = VG_(threads)[tid].arch.vex.guest_v1.w64[0];
125 regs->misc.S390X.r_f2
126 = VG_(threads)[tid].arch.vex.guest_v2.w64[0];
127 regs->misc.S390X.r_f3
128 = VG_(threads)[tid].arch.vex.guest_v3.w64[0];
129 regs->misc.S390X.r_f4
130 = VG_(threads)[tid].arch.vex.guest_v4.w64[0];
131 regs->misc.S390X.r_f5
132 = VG_(threads)[tid].arch.vex.guest_v5.w64[0];
133 regs->misc.S390X.r_f6
134 = VG_(threads)[tid].arch.vex.guest_v6.w64[0];
135 regs->misc.S390X.r_f7
136 = VG_(threads)[tid].arch.vex.guest_v7.w64[0];
137 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
138 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
139 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
140 regs->misc.MIPS32.r30
141 = VG_(threads)[tid].arch.vex.guest_r30;
142 regs->misc.MIPS32.r31
143 = VG_(threads)[tid].arch.vex.guest_r31;
144 regs->misc.MIPS32.r28
145 = VG_(threads)[tid].arch.vex.guest_r28;
146 # elif defined(VGA_mips64)
147 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
148 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
149 regs->misc.MIPS64.r30
150 = VG_(threads)[tid].arch.vex.guest_r30;
151 regs->misc.MIPS64.r31
152 = VG_(threads)[tid].arch.vex.guest_r31;
153 regs->misc.MIPS64.r28
154 = VG_(threads)[tid].arch.vex.guest_r28;
155 # else
156 # error "Unknown arch"
157 # endif
160 void
161 VG_(get_shadow_regs_area) ( ThreadId tid,
162 /*DST*/UChar* dst,
163 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
165 void* src;
166 ThreadState* tst;
167 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
168 vg_assert(VG_(is_valid_tid)(tid));
169 // Bounds check
170 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
171 vg_assert(offset + size <= sizeof(VexGuestArchState));
172 // Copy
173 tst = & VG_(threads)[tid];
174 src = NULL;
175 switch (shadowNo) {
176 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
177 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
178 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
180 vg_assert(src != NULL);
181 VG_(memcpy)( dst, src, size);
184 void
185 VG_(set_shadow_regs_area) ( ThreadId tid,
186 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
187 /*SRC*/const UChar* src )
189 void* dst;
190 ThreadState* tst;
191 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
192 vg_assert(VG_(is_valid_tid)(tid));
193 // Bounds check
194 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
195 vg_assert(offset + size <= sizeof(VexGuestArchState));
196 // Copy
197 tst = & VG_(threads)[tid];
198 dst = NULL;
199 switch (shadowNo) {
200 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
201 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
202 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
204 vg_assert(dst != NULL);
205 VG_(memcpy)( dst, src, size);
209 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
210 const HChar*, Addr))
212 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
213 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
214 #if defined(VGA_x86)
215 (*f)(tid, "EAX", vex->guest_EAX);
216 (*f)(tid, "ECX", vex->guest_ECX);
217 (*f)(tid, "EDX", vex->guest_EDX);
218 (*f)(tid, "EBX", vex->guest_EBX);
219 (*f)(tid, "ESI", vex->guest_ESI);
220 (*f)(tid, "EDI", vex->guest_EDI);
221 (*f)(tid, "ESP", vex->guest_ESP);
222 (*f)(tid, "EBP", vex->guest_EBP);
223 #elif defined(VGA_amd64)
224 (*f)(tid, "RAX", vex->guest_RAX);
225 (*f)(tid, "RCX", vex->guest_RCX);
226 (*f)(tid, "RDX", vex->guest_RDX);
227 (*f)(tid, "RBX", vex->guest_RBX);
228 (*f)(tid, "RSI", vex->guest_RSI);
229 (*f)(tid, "RDI", vex->guest_RDI);
230 (*f)(tid, "RSP", vex->guest_RSP);
231 (*f)(tid, "RBP", vex->guest_RBP);
232 (*f)(tid, "R8" , vex->guest_R8 );
233 (*f)(tid, "R9" , vex->guest_R9 );
234 (*f)(tid, "R10", vex->guest_R10);
235 (*f)(tid, "R11", vex->guest_R11);
236 (*f)(tid, "R12", vex->guest_R12);
237 (*f)(tid, "R13", vex->guest_R13);
238 (*f)(tid, "R14", vex->guest_R14);
239 (*f)(tid, "R15", vex->guest_R15);
240 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
241 (*f)(tid, "GPR0" , vex->guest_GPR0 );
242 (*f)(tid, "GPR1" , vex->guest_GPR1 );
243 (*f)(tid, "GPR2" , vex->guest_GPR2 );
244 (*f)(tid, "GPR3" , vex->guest_GPR3 );
245 (*f)(tid, "GPR4" , vex->guest_GPR4 );
246 (*f)(tid, "GPR5" , vex->guest_GPR5 );
247 (*f)(tid, "GPR6" , vex->guest_GPR6 );
248 (*f)(tid, "GPR7" , vex->guest_GPR7 );
249 (*f)(tid, "GPR8" , vex->guest_GPR8 );
250 (*f)(tid, "GPR9" , vex->guest_GPR9 );
251 (*f)(tid, "GPR10", vex->guest_GPR10);
252 (*f)(tid, "GPR11", vex->guest_GPR11);
253 (*f)(tid, "GPR12", vex->guest_GPR12);
254 (*f)(tid, "GPR13", vex->guest_GPR13);
255 (*f)(tid, "GPR14", vex->guest_GPR14);
256 (*f)(tid, "GPR15", vex->guest_GPR15);
257 (*f)(tid, "GPR16", vex->guest_GPR16);
258 (*f)(tid, "GPR17", vex->guest_GPR17);
259 (*f)(tid, "GPR18", vex->guest_GPR18);
260 (*f)(tid, "GPR19", vex->guest_GPR19);
261 (*f)(tid, "GPR20", vex->guest_GPR20);
262 (*f)(tid, "GPR21", vex->guest_GPR21);
263 (*f)(tid, "GPR22", vex->guest_GPR22);
264 (*f)(tid, "GPR23", vex->guest_GPR23);
265 (*f)(tid, "GPR24", vex->guest_GPR24);
266 (*f)(tid, "GPR25", vex->guest_GPR25);
267 (*f)(tid, "GPR26", vex->guest_GPR26);
268 (*f)(tid, "GPR27", vex->guest_GPR27);
269 (*f)(tid, "GPR28", vex->guest_GPR28);
270 (*f)(tid, "GPR29", vex->guest_GPR29);
271 (*f)(tid, "GPR30", vex->guest_GPR30);
272 (*f)(tid, "GPR31", vex->guest_GPR31);
273 (*f)(tid, "CTR" , vex->guest_CTR );
274 (*f)(tid, "LR" , vex->guest_LR );
275 #elif defined(VGA_arm)
276 (*f)(tid, "R0" , vex->guest_R0 );
277 (*f)(tid, "R1" , vex->guest_R1 );
278 (*f)(tid, "R2" , vex->guest_R2 );
279 (*f)(tid, "R3" , vex->guest_R3 );
280 (*f)(tid, "R4" , vex->guest_R4 );
281 (*f)(tid, "R5" , vex->guest_R5 );
282 (*f)(tid, "R6" , vex->guest_R6 );
283 (*f)(tid, "R8" , vex->guest_R8 );
284 (*f)(tid, "R9" , vex->guest_R9 );
285 (*f)(tid, "R10", vex->guest_R10);
286 (*f)(tid, "R11", vex->guest_R11);
287 (*f)(tid, "R12", vex->guest_R12);
288 (*f)(tid, "R13", vex->guest_R13);
289 (*f)(tid, "R14", vex->guest_R14);
290 #elif defined(VGA_s390x)
291 (*f)(tid, "r0" , vex->guest_r0 );
292 (*f)(tid, "r1" , vex->guest_r1 );
293 (*f)(tid, "r2" , vex->guest_r2 );
294 (*f)(tid, "r3" , vex->guest_r3 );
295 (*f)(tid, "r4" , vex->guest_r4 );
296 (*f)(tid, "r5" , vex->guest_r5 );
297 (*f)(tid, "r6" , vex->guest_r6 );
298 (*f)(tid, "r7" , vex->guest_r7 );
299 (*f)(tid, "r8" , vex->guest_r8 );
300 (*f)(tid, "r9" , vex->guest_r9 );
301 (*f)(tid, "r10", vex->guest_r10);
302 (*f)(tid, "r11", vex->guest_r11);
303 (*f)(tid, "r12", vex->guest_r12);
304 (*f)(tid, "r13", vex->guest_r13);
305 (*f)(tid, "r14", vex->guest_r14);
306 (*f)(tid, "r15", vex->guest_r15);
307 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGP_nanomips_linux)
308 (*f)(tid, "r0" , vex->guest_r0 );
309 (*f)(tid, "r1" , vex->guest_r1 );
310 (*f)(tid, "r2" , vex->guest_r2 );
311 (*f)(tid, "r3" , vex->guest_r3 );
312 (*f)(tid, "r4" , vex->guest_r4 );
313 (*f)(tid, "r5" , vex->guest_r5 );
314 (*f)(tid, "r6" , vex->guest_r6 );
315 (*f)(tid, "r7" , vex->guest_r7 );
316 (*f)(tid, "r8" , vex->guest_r8 );
317 (*f)(tid, "r9" , vex->guest_r9 );
318 (*f)(tid, "r10", vex->guest_r10);
319 (*f)(tid, "r11", vex->guest_r11);
320 (*f)(tid, "r12", vex->guest_r12);
321 (*f)(tid, "r13", vex->guest_r13);
322 (*f)(tid, "r14", vex->guest_r14);
323 (*f)(tid, "r15", vex->guest_r15);
324 (*f)(tid, "r16", vex->guest_r16);
325 (*f)(tid, "r17", vex->guest_r17);
326 (*f)(tid, "r18", vex->guest_r18);
327 (*f)(tid, "r19", vex->guest_r19);
328 (*f)(tid, "r20", vex->guest_r20);
329 (*f)(tid, "r21", vex->guest_r21);
330 (*f)(tid, "r22", vex->guest_r22);
331 (*f)(tid, "r23", vex->guest_r23);
332 (*f)(tid, "r24", vex->guest_r24);
333 (*f)(tid, "r25", vex->guest_r25);
334 (*f)(tid, "r26", vex->guest_r26);
335 (*f)(tid, "r27", vex->guest_r27);
336 (*f)(tid, "r28", vex->guest_r28);
337 (*f)(tid, "r29", vex->guest_r29);
338 (*f)(tid, "r30", vex->guest_r30);
339 (*f)(tid, "r31", vex->guest_r31);
340 #elif defined(VGA_arm64)
341 (*f)(tid, "x0" , vex->guest_X0 );
342 (*f)(tid, "x1" , vex->guest_X1 );
343 (*f)(tid, "x2" , vex->guest_X2 );
344 (*f)(tid, "x3" , vex->guest_X3 );
345 (*f)(tid, "x4" , vex->guest_X4 );
346 (*f)(tid, "x5" , vex->guest_X5 );
347 (*f)(tid, "x6" , vex->guest_X6 );
348 (*f)(tid, "x7" , vex->guest_X7 );
349 (*f)(tid, "x8" , vex->guest_X8 );
350 (*f)(tid, "x9" , vex->guest_X9 );
351 (*f)(tid, "x10", vex->guest_X10);
352 (*f)(tid, "x11", vex->guest_X11);
353 (*f)(tid, "x12", vex->guest_X12);
354 (*f)(tid, "x13", vex->guest_X13);
355 (*f)(tid, "x14", vex->guest_X14);
356 (*f)(tid, "x15", vex->guest_X15);
357 (*f)(tid, "x16", vex->guest_X16);
358 (*f)(tid, "x17", vex->guest_X17);
359 (*f)(tid, "x18", vex->guest_X18);
360 (*f)(tid, "x19", vex->guest_X19);
361 (*f)(tid, "x20", vex->guest_X20);
362 (*f)(tid, "x21", vex->guest_X21);
363 (*f)(tid, "x22", vex->guest_X22);
364 (*f)(tid, "x23", vex->guest_X23);
365 (*f)(tid, "x24", vex->guest_X24);
366 (*f)(tid, "x25", vex->guest_X25);
367 (*f)(tid, "x26", vex->guest_X26);
368 (*f)(tid, "x27", vex->guest_X27);
369 (*f)(tid, "x28", vex->guest_X28);
370 (*f)(tid, "x29", vex->guest_X29);
371 (*f)(tid, "x30", vex->guest_X30);
372 #else
373 # error Unknown arch
374 #endif
378 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
380 ThreadId tid;
382 for (tid = 1; tid < VG_N_THREADS; tid++) {
383 if (VG_(is_valid_tid)(tid)
384 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
385 // live thread or thread instructed to die by another thread that
386 // called exit.
387 apply_to_GPs_of_tid(tid, f);
392 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
394 *tid = (ThreadId)(-1);
397 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
398 /*OUT*/Addr* stack_min,
399 /*OUT*/Addr* stack_max)
401 ThreadId i;
402 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
403 if (i == VG_INVALID_THREADID)
404 continue;
405 if (VG_(threads)[i].status != VgTs_Empty) {
406 *tid = i;
407 *stack_min = VG_(get_SP)(i);
408 *stack_max = VG_(threads)[i].client_stack_highest_byte;
409 return True;
412 return False;
415 Addr VG_(thread_get_stack_max)(ThreadId tid)
417 vg_assert(tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
418 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
419 return VG_(threads)[tid].client_stack_highest_byte;
422 SizeT VG_(thread_get_stack_size)(ThreadId tid)
424 vg_assert(tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
425 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
426 return VG_(threads)[tid].client_stack_szB;
429 Addr VG_(thread_get_altstack_min)(ThreadId tid)
431 vg_assert(tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
432 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
433 return (Addr)VG_(threads)[tid].altstack.ss_sp;
436 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
438 vg_assert(tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
439 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
440 return VG_(threads)[tid].altstack.ss_size;
443 //-------------------------------------------------------------
444 /* Details about the capabilities of the underlying (host) CPU. These
445 details are acquired by (1) enquiring with the CPU at startup, or
446 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
447 line size). It's a bit nasty in the sense that there's no obvious
448 way to stop uses of some of this info before it's ready to go.
449 See pub_core_machine.h for more information about that.
451 VG_(machine_get_hwcaps) may use signals (although it attempts to
452 leave signal state unchanged) and therefore should only be
453 called before m_main sets up the client's signal state.
456 /* --------- State --------- */
457 static Bool hwcaps_done = False;
459 /* --- all archs --- */
460 static VexArch va = VexArch_INVALID;
461 static VexArchInfo vai;
463 #if defined(VGA_x86)
464 UInt VG_(machine_x86_have_mxcsr) = 0;
465 #endif
466 #if defined(VGA_ppc32)
467 UInt VG_(machine_ppc32_has_FP) = 0;
468 UInt VG_(machine_ppc32_has_VMX) = 0;
469 #endif
470 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
471 ULong VG_(machine_ppc64_has_VMX) = 0;
472 #endif
473 #if defined(VGA_arm)
474 Int VG_(machine_arm_archlevel) = 4;
475 #endif
478 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
479 testing, so we need a VG_MINIMAL_JMP_BUF. */
480 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
481 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
482 || defined(VGA_mips64) || defined(VGA_arm64)
483 #include "pub_core_libcsetjmp.h"
484 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
485 static void handler_unsup_insn ( Int x ) {
486 VG_MINIMAL_LONGJMP(env_unsup_insn);
488 #endif
491 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
492 * handlers are installed. Determines the sizes affected by dcbz
493 * and dcbzl instructions and updates the given VexArchInfo structure
494 * accordingly.
496 * Not very defensive: assumes that as long as the dcbz/dcbzl
497 * instructions don't raise a SIGILL, that they will zero an aligned,
498 * contiguous block of memory of a sensible size. */
499 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
500 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
502 Int dcbz_szB = 0;
503 Int dcbzl_szB;
504 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
505 char test_block[4*MAX_DCBZL_SZB];
506 char *aligned = test_block;
507 Int i;
509 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
510 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
511 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
513 /* dcbz often clears 32B, although sometimes whatever the native cache
514 * block size is */
515 VG_(memset)(test_block, 0xff, sizeof(test_block));
516 __asm__ __volatile__("dcbz 0,%0"
517 : /*out*/
518 : "r" (aligned) /*in*/
519 : "memory" /*clobber*/);
520 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
521 if (!test_block[i])
522 ++dcbz_szB;
524 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
526 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
527 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
528 dcbzl_szB = 0; /* indicates unsupported */
530 else {
531 VG_(memset)(test_block, 0xff, sizeof(test_block));
532 /* some older assemblers won't understand the dcbzl instruction
533 * variant, so we directly emit the instruction ourselves */
534 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
535 : /*out*/
536 : "r" (aligned) /*in*/
537 : "memory", "r9" /*clobber*/);
538 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
539 if (!test_block[i])
540 ++dcbzl_szB;
542 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
545 arch_info->ppc_dcbz_szB = dcbz_szB;
546 arch_info->ppc_dcbzl_szB = dcbzl_szB;
548 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
549 dcbz_szB, dcbzl_szB);
550 # undef MAX_DCBZL_SZB
552 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
554 #ifdef VGA_s390x
556 /* Read /proc/cpuinfo. Look for lines like these
558 processor 0: version = FF, identification = 0117C9, machine = 2064
560 and return the machine model. If the machine model could not be determined
561 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
563 static UInt VG_(get_machine_model)(void)
565 static struct model_map {
566 const HChar name[5];
567 UInt id;
568 } model_map[] = {
569 { "2064", VEX_S390X_MODEL_Z900 },
570 { "2066", VEX_S390X_MODEL_Z800 },
571 { "2084", VEX_S390X_MODEL_Z990 },
572 { "2086", VEX_S390X_MODEL_Z890 },
573 { "2094", VEX_S390X_MODEL_Z9_EC },
574 { "2096", VEX_S390X_MODEL_Z9_BC },
575 { "2097", VEX_S390X_MODEL_Z10_EC },
576 { "2098", VEX_S390X_MODEL_Z10_BC },
577 { "2817", VEX_S390X_MODEL_Z196 },
578 { "2818", VEX_S390X_MODEL_Z114 },
579 { "2827", VEX_S390X_MODEL_ZEC12 },
580 { "2828", VEX_S390X_MODEL_ZBC12 },
581 { "2964", VEX_S390X_MODEL_Z13 },
582 { "2965", VEX_S390X_MODEL_Z13S },
583 { "3906", VEX_S390X_MODEL_Z14 },
584 { "3907", VEX_S390X_MODEL_Z14_ZR1 },
585 { "8561", VEX_S390X_MODEL_Z15 },
586 { "8562", VEX_S390X_MODEL_Z15 },
587 { "3931", VEX_S390X_MODEL_Z16 },
588 { "3932", VEX_S390X_MODEL_Z16 },
591 Int model, n, fh;
592 SysRes fd;
593 SizeT num_bytes, file_buf_size;
594 HChar *p, *m, *model_name, *file_buf;
596 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
597 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
598 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
600 fh = sr_Res(fd);
602 /* Determine the size of /proc/cpuinfo.
603 Work around broken-ness in /proc file system implementation.
604 fstat returns a zero size for /proc/cpuinfo although it is
605 claimed to be a regular file. */
606 num_bytes = 0;
607 file_buf_size = 1000;
608 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
609 while (42) {
610 n = VG_(read)(fh, file_buf, file_buf_size);
611 if (n < 0) break;
613 num_bytes += n;
614 if (n < file_buf_size) break; /* reached EOF */
617 if (n < 0) num_bytes = 0; /* read error; ignore contents */
619 if (num_bytes > file_buf_size) {
620 VG_(free)( file_buf );
621 VG_(lseek)( fh, 0, VKI_SEEK_SET );
622 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
623 n = VG_(read)( fh, file_buf, num_bytes );
624 if (n < 0) num_bytes = 0;
627 file_buf[num_bytes] = '\0';
628 VG_(close)(fh);
630 /* Parse file */
631 model = VEX_S390X_MODEL_UNKNOWN;
632 for (p = file_buf; *p; ++p) {
633 /* Beginning of line */
634 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
636 m = VG_(strstr)( p, "machine" );
637 if (m == NULL) continue;
639 p = m + sizeof "machine" - 1;
640 while ( VG_(isspace)( *p ) || *p == '=') {
641 if (*p == '\n') goto next_line;
642 ++p;
645 model_name = p;
646 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
647 struct model_map *mm = model_map + n;
648 SizeT len = VG_(strlen)( mm->name );
649 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
650 VG_(isspace)( model_name[len] )) {
651 if (mm->id < model) model = mm->id;
652 p = model_name + len;
653 break;
656 /* Skip until end-of-line */
657 while (*p != '\n')
658 ++p;
659 next_line: ;
662 VG_(free)( file_buf );
663 VG_(debugLog)(1, "machine", "model = %s\n",
664 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
665 : model_map[model].name);
666 return model;
669 #endif /* defined(VGA_s390x) */
671 #if defined(VGA_mips32) || defined(VGA_mips64)
674 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
675 * determine what CPU it is (it searches only for the models that are or may be
676 * supported by Valgrind).
678 static Bool VG_(parse_cpuinfo)(void)
680 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
681 const char *search_Cavium_str= "cpu model\t\t: Cavium";
682 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
683 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
684 const char *search_MIPS_str = "cpu model\t\t: MIPS";
685 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
687 Int n, fh;
688 SysRes fd;
689 SizeT num_bytes, file_buf_size;
690 HChar *file_buf, *isa;
692 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
693 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
694 if ( sr_isError(fd) ) return False;
696 fh = sr_Res(fd);
698 /* Determine the size of /proc/cpuinfo.
699 Work around broken-ness in /proc file system implementation.
700 fstat returns a zero size for /proc/cpuinfo although it is
701 claimed to be a regular file. */
702 num_bytes = 0;
703 file_buf_size = 1000;
704 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
705 while (42) {
706 n = VG_(read)(fh, file_buf, file_buf_size);
707 if (n < 0) break;
709 num_bytes += n;
710 if (n < file_buf_size) break; /* reached EOF */
713 if (n < 0) num_bytes = 0; /* read error; ignore contents */
715 if (num_bytes > file_buf_size) {
716 VG_(free)( file_buf );
717 VG_(lseek)( fh, 0, VKI_SEEK_SET );
718 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
719 n = VG_(read)( fh, file_buf, num_bytes );
720 if (n < 0) num_bytes = 0;
723 file_buf[num_bytes] = '\0';
724 VG_(close)(fh);
726 /* Parse file */
727 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
728 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
729 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
730 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
731 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
732 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
733 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
734 vai.hwcaps = VEX_PRID_COMP_MIPS;
735 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
736 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
737 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
738 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
739 else {
740 /* Did not find string in the proc file. */
741 vai.hwcaps = 0;
742 VG_(free)(file_buf);
743 return False;
746 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
748 if (NULL != isa) {
749 if (VG_(strstr) (isa, "mips32r1") != NULL)
750 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
751 if (VG_(strstr) (isa, "mips32r2") != NULL)
752 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
753 if (VG_(strstr) (isa, "mips32r6") != NULL)
754 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
755 if (VG_(strstr) (isa, "mips64r1") != NULL)
756 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
757 if (VG_(strstr) (isa, "mips64r2") != NULL)
758 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
759 if (VG_(strstr) (isa, "mips64r6") != NULL)
760 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
763 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
764 * decide to change incorrect settings in
765 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
766 * The current settings show mips32r1, mips32r2 and mips64r1 as
767 * unsupported ISAs by Cavium MIPS CPUs.
769 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
770 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
771 VEX_MIPS_CPU_ISA_M64R1;
773 } else {
775 * Kernel does not provide information about supported ISAs.
776 * Populate the isa level flags based on the CPU model. That is our
777 * best guess.
779 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
780 case VEX_PRID_COMP_CAVIUM:
781 case VEX_PRID_COMP_NETLOGIC:
782 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
783 /* fallthrough */
784 case VEX_PRID_COMP_INGENIC_E1:
785 case VEX_PRID_COMP_MIPS:
786 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
787 /* fallthrough */
788 case VEX_PRID_COMP_BROADCOM:
789 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
790 break;
791 case VEX_PRID_COMP_LEGACY:
792 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
793 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
794 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
795 break;
796 default:
797 break;
800 VG_(free)(file_buf);
801 return True;
804 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
806 #if defined(VGP_arm64_linux)
808 /* Check to see whether we are running on a Cavium core, and if so auto-enable
809 the fallback LLSC implementation. See #369459. */
811 static Bool VG_(parse_cpuinfo)(void)
813 const char *search_Cavium_str = "CPU implementer\t: 0x43";
815 Int n, fh;
816 SysRes fd;
817 SizeT num_bytes, file_buf_size;
818 HChar *file_buf;
820 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
821 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
822 if ( sr_isError(fd) ) return False;
824 fh = sr_Res(fd);
826 /* Determine the size of /proc/cpuinfo.
827 Work around broken-ness in /proc file system implementation.
828 fstat returns a zero size for /proc/cpuinfo although it is
829 claimed to be a regular file. */
830 num_bytes = 0;
831 file_buf_size = 1000;
832 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
833 while (42) {
834 n = VG_(read)(fh, file_buf, file_buf_size);
835 if (n < 0) break;
837 num_bytes += n;
838 if (n < file_buf_size) break; /* reached EOF */
841 if (n < 0) num_bytes = 0; /* read error; ignore contents */
843 if (num_bytes > file_buf_size) {
844 VG_(free)( file_buf );
845 VG_(lseek)( fh, 0, VKI_SEEK_SET );
846 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
847 n = VG_(read)( fh, file_buf, num_bytes );
848 if (n < 0) num_bytes = 0;
851 file_buf[num_bytes] = '\0';
852 VG_(close)(fh);
854 /* Parse file */
855 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
856 vai.arm64_requires_fallback_LLSC = True;
858 VG_(free)(file_buf);
859 return True;
862 #endif /* defined(VGP_arm64_linux) */
864 Bool VG_(machine_get_hwcaps)( void )
866 vg_assert(hwcaps_done == False);
867 hwcaps_done = True;
869 // Whack default settings into vai, so that we only need to fill in
870 // any interesting bits.
871 LibVEX_default_VexArchInfo(&vai);
873 #if defined(VGA_x86)
874 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
875 UInt eax, ebx, ecx, edx, max_extended;
876 HChar vstr[13];
877 vstr[0] = 0;
879 if (!VG_(has_cpuid)())
880 /* we can't do cpuid at all. Give up. */
881 return False;
883 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
884 if (eax < 1)
885 /* we can't ask for cpuid(x) for x > 0. Give up. */
886 return False;
888 /* Get processor ID string, and max basic/extended index
889 values. */
890 VG_(memcpy)(&vstr[0], &ebx, 4);
891 VG_(memcpy)(&vstr[4], &edx, 4);
892 VG_(memcpy)(&vstr[8], &ecx, 4);
893 vstr[12] = 0;
895 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
896 max_extended = eax;
898 /* get capabilities bits into edx */
899 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
901 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
902 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
903 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
905 /* cmpxchg8b is a minimum requirement now; if we don't have it we
906 must simply give up. But all CPUs since Pentium-I have it, so
907 that doesn't seem like much of a restriction. */
908 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
909 if (!have_cx8)
910 return False;
912 #if defined(VGP_x86_freebsd)
913 if (have_sse1 || have_sse2) {
914 Int sc, error;
915 SizeT scl;
916 /* Regardless of whether cpuid says, the OS has to enable SSE first! */
917 scl = sizeof(sc);
918 error = VG_(sysctlbyname)("hw.instruction_sse", &sc, &scl, 0, 0);
919 if (error == -1 || sc != 1) {
920 have_sse1 = 0;
921 have_sse2 = 0;
922 VG_(message)(Vg_UserMsg, "Warning: cpu has SSE, but the OS has not enabled it. Disabling in valgrind!");
925 #endif
926 /* Figure out if this is an AMD that can do MMXEXT. */
927 have_mmxext = False;
928 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
929 && max_extended >= 0x80000001) {
930 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
931 /* Some older AMD processors support a sse1 subset (Integer SSE). */
932 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
935 /* Figure out if this is an AMD or Intel that can do LZCNT. */
936 have_lzcnt = False;
937 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
938 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
939 && max_extended >= 0x80000001) {
940 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
941 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
944 /* Intel processors don't define the mmxext extension, but since it
945 is just a sse1 subset always define it when we have sse1. */
946 if (have_sse1)
947 have_mmxext = True;
949 va = VexArchX86;
950 vai.endness = VexEndnessLE;
952 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
953 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
954 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
955 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
956 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
957 if (have_lzcnt)
958 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
959 VG_(machine_x86_have_mxcsr) = 1;
960 } else if (have_sse2 && have_sse1 && have_mmxext) {
961 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
962 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
963 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
964 if (have_lzcnt)
965 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
966 VG_(machine_x86_have_mxcsr) = 1;
967 } else if (have_sse1 && have_mmxext) {
968 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
969 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
970 VG_(machine_x86_have_mxcsr) = 1;
971 } else if (have_mmxext) {
972 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
973 VG_(machine_x86_have_mxcsr) = 0;
974 } else {
975 vai.hwcaps = 0; /*baseline - no sse at all*/
976 VG_(machine_x86_have_mxcsr) = 0;
979 VG_(machine_get_cache_info)(&vai);
981 return True;
984 #elif defined(VGA_amd64)
985 { Bool have_sse3, have_ssse3, have_cx8, have_cx16;
986 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
987 Bool have_fma3, have_fma4;
988 Bool have_rdtscp, have_rdrand, have_f16c, have_rdseed;
989 UInt eax, ebx, ecx, edx, max_basic, max_extended;
990 ULong xgetbv_0 = 0;
991 HChar vstr[13];
992 vstr[0] = 0;
994 have_sse3 = have_ssse3 = have_cx8 = have_cx16
995 = have_lzcnt = have_avx = have_bmi = have_avx2
996 = have_rdtscp = have_rdrand = have_f16c = have_rdseed
997 = have_fma3 = have_fma4 = False;
999 eax = ebx = ecx = edx = max_basic = max_extended = 0;
1001 if (!VG_(has_cpuid)())
1002 /* we can't do cpuid at all. Give up. */
1003 return False;
1005 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
1006 max_basic = eax;
1007 if (max_basic < 1)
1008 /* we can't ask for cpuid(x) for x > 0. Give up. */
1009 return False;
1011 /* Get processor ID string, and max basic/extended index
1012 values. */
1013 VG_(memcpy)(&vstr[0], &ebx, 4);
1014 VG_(memcpy)(&vstr[4], &edx, 4);
1015 VG_(memcpy)(&vstr[8], &ecx, 4);
1016 vstr[12] = 0;
1018 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
1019 max_extended = eax;
1021 /* get capabilities bits into edx */
1022 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
1024 // we assume that SSE1 and SSE2 are available by default
1025 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
1026 have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */
1027 have_fma3 = (ecx & (1<<12))!= 0; /* True => have fma3 insns */
1028 // sse41 is ecx:19
1029 // sse42 is ecx:20
1030 // xsave is ecx:26
1031 // osxsave is ecx:27
1032 // avx is ecx:28
1033 have_f16c = (ecx & (1<<29)) != 0; /* True => have F16C insns */
1034 have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */
1036 have_avx = False;
1038 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1039 /* Processor supports AVX instructions and XGETBV is enabled
1040 by OS and AVX instructions are enabled by the OS. */
1041 ULong w;
1042 __asm__ __volatile__("movq $0,%%rcx ; "
1043 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1044 "movq %%rax,%0"
1045 :/*OUT*/"=r"(w) :/*IN*/
1046 :/*TRASH*/"rdx","rcx","rax");
1047 xgetbv_0 = w;
1048 if ((xgetbv_0 & 7) == 7) {
1049 /* Only say we have AVX if the XSAVE-allowable
1050 bitfield-mask allows x87, SSE and AVX state. We could
1051 actually run with a more restrictive XGETBV(0) value,
1052 but VEX's implementation of XSAVE and XRSTOR assumes
1053 that all 3 bits are enabled.
1055 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1056 state component [2] (the YMM high halves) are located in
1057 the XSAVE image at offsets 576 .. 831. So we have to
1058 check that here before declaring AVX to be supported. */
1059 UInt eax2, ebx2, ecx2, edx2;
1060 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1061 if (ebx2 == 576 && eax2 == 256) {
1062 have_avx = True;
1067 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1068 must simply give up. But all CPUs since Pentium-I have it, so
1069 that doesn't seem like much of a restriction. */
1070 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1071 if (!have_cx8)
1072 return False;
1074 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1075 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1077 /* Figure out if this CPU can do LZCNT. */
1078 have_lzcnt = False;
1079 if (max_extended >= 0x80000001) {
1080 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1081 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1084 /* Can we do RDTSCP? */
1085 have_rdtscp = False;
1086 if (max_extended >= 0x80000001) {
1087 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1088 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1091 if (max_extended >= 0x80000001) {
1092 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1093 have_fma4= (ecx & (1<<16)) != 0; /* True => have fma4 */
1096 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1097 have_bmi = False;
1098 have_avx2 = False;
1099 if (have_avx && max_basic >= 7) {
1100 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1101 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1102 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1103 have_rdseed = (ebx & (1<<18)) != 0; /* True => have RDSEED insns */
1106 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1107 it's convenient to restrict them to the AVX case since the simulated
1108 CPUID we'll offer them on has AVX as a base. */
1109 if (!have_avx) {
1110 have_f16c = False;
1111 have_rdrand = False;
1112 have_rdseed = False;
1115 va = VexArchAMD64;
1116 vai.endness = VexEndnessLE;
1117 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1118 | (have_ssse3 ? VEX_HWCAPS_AMD64_SSSE3 : 0)
1119 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1120 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1121 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1122 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1123 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1124 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0)
1125 | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0)
1126 | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0)
1127 | (have_rdseed ? VEX_HWCAPS_AMD64_RDSEED : 0)
1128 | (have_fma3 ? VEX_HWCAPS_AMD64_FMA3 : 0)
1129 | (have_fma4 ? VEX_HWCAPS_AMD64_FMA4 : 0);
1131 VG_(machine_get_cache_info)(&vai);
1133 return True;
1136 #elif defined(VGA_ppc32)
1138 /* Find out which subset of the ppc32 instruction set is supported by
1139 verifying whether various ppc32 instructions generate a SIGILL
1140 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1141 AT_PLATFORM entries in the ELF auxiliary table -- see also
1142 the_iifii.client_auxv in m_main.c.
1144 vki_sigset_t saved_set, tmp_set;
1145 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1146 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1148 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1149 volatile Bool have_isa_2_07, have_isa_3_0;
1150 Int r;
1152 /* This is a kludge. Really we ought to back-convert saved_act
1153 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1154 since that's a no-op on all ppc32 platforms so far supported,
1155 it's not worth the typing effort. At least include most basic
1156 sanity check: */
1157 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1159 VG_(sigemptyset)(&tmp_set);
1160 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1161 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1163 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1164 vg_assert(r == 0);
1166 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1167 vg_assert(r == 0);
1168 tmp_sigill_act = saved_sigill_act;
1170 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1171 vg_assert(r == 0);
1172 tmp_sigfpe_act = saved_sigfpe_act;
1174 /* NODEFER: signal handler does not return (from the kernel's point of
1175 view), hence if it is to successfully catch a signal more than once,
1176 we need the NODEFER flag. */
1177 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1178 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1179 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1180 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1181 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1182 vg_assert(r == 0);
1184 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1185 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1186 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1187 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1188 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1189 vg_assert(r == 0);
1191 /* standard FP insns */
1192 have_F = True;
1193 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1194 have_F = False;
1195 } else {
1196 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1199 /* Altivec insns */
1200 have_V = True;
1201 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1202 have_V = False;
1203 } else {
1204 /* Unfortunately some older assemblers don't speak Altivec (or
1205 choose not to), so to be safe we directly emit the 32-bit
1206 word corresponding to "vor 0,0,0". This fixes a build
1207 problem that happens on Debian 3.1 (ppc32), and probably
1208 various other places. */
1209 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1212 /* General-Purpose optional (fsqrt, fsqrts) */
1213 have_FX = True;
1214 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1215 have_FX = False;
1216 } else {
1217 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1220 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1221 have_GX = True;
1222 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1223 have_GX = False;
1224 } else {
1225 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1228 /* VSX support implies Power ISA 2.06 */
1229 have_VX = True;
1230 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1231 have_VX = False;
1232 } else {
1233 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1236 /* Check for Decimal Floating Point (DFP) support. */
1237 have_DFP = True;
1238 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1239 have_DFP = False;
1240 } else {
1241 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1244 /* Check for ISA 2.07 support. */
1245 have_isa_2_07 = True;
1246 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1247 have_isa_2_07 = False;
1248 } else {
1249 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1252 /* Check for ISA 3.0 support. */
1253 have_isa_3_0 = True;
1254 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1255 have_isa_3_0 = False;
1256 } else {
1257 __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
1260 // ISA 3.1 not supported on 32-bit systems
1262 // scv instruction not supported on 32-bit systems.
1264 /* determine dcbz/dcbzl sizes while we still have the signal
1265 * handlers registered */
1266 find_ppc_dcbz_sz(&vai);
1268 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1269 vg_assert(r == 0);
1270 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1271 vg_assert(r == 0);
1272 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1273 vg_assert(r == 0);
1274 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1275 (Int)have_F, (Int)have_V, (Int)have_FX,
1276 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1277 (Int)have_isa_2_07, (Int)have_isa_3_0);
1278 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1279 if (have_V && !have_F)
1280 have_V = False;
1281 if (have_FX && !have_F)
1282 have_FX = False;
1283 if (have_GX && !have_F)
1284 have_GX = False;
1286 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1287 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1289 va = VexArchPPC32;
1290 vai.endness = VexEndnessBE;
1292 vai.hwcaps = 0;
1293 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1294 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1295 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1296 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1297 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1298 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1299 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1300 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1301 /* ISA 3.1 not supported on 32-bit systems. */
1302 /* SCV not supported on PPC32 */
1304 VG_(machine_get_cache_info)(&vai);
1306 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1307 called before we're ready to go. */
1308 return True;
1311 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1313 /* Same instruction set detection algorithm as for ppc32. */
1314 vki_sigset_t saved_set, tmp_set;
1315 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1316 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1318 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1319 volatile Bool have_isa_2_07, have_isa_3_0, have_isa_3_1;
1320 Int r;
1322 /* This is a kludge. Really we ought to back-convert saved_act
1323 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1324 since that's a no-op on all ppc64 platforms so far supported,
1325 it's not worth the typing effort. At least include most basic
1326 sanity check: */
1327 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1329 VG_(sigemptyset)(&tmp_set);
1330 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1331 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1333 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1334 vg_assert(r == 0);
1336 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1337 vg_assert(r == 0);
1338 tmp_sigill_act = saved_sigill_act;
1340 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1341 tmp_sigfpe_act = saved_sigfpe_act;
1343 /* NODEFER: signal handler does not return (from the kernel's point of
1344 view), hence if it is to successfully catch a signal more than once,
1345 we need the NODEFER flag. */
1346 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1347 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1348 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1349 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1350 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1352 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1353 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1354 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1355 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1356 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1358 /* standard FP insns */
1359 have_F = True;
1360 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1361 have_F = False;
1362 } else {
1363 __asm__ __volatile__("fmr 0,0");
1366 /* Altivec insns */
1367 have_V = True;
1368 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1369 have_V = False;
1370 } else {
1371 __asm__ __volatile__(".long 0x10000484"); /* vor v0,v0,v0 */
1374 /* General-Purpose optional (fsqrt, fsqrts) */
1375 have_FX = True;
1376 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1377 have_FX = False;
1378 } else {
1379 __asm__ __volatile__(".long 0xFC00002C"); /* fsqrt f0,f0 */
1382 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1383 have_GX = True;
1384 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1385 have_GX = False;
1386 } else {
1387 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte f0,f0 */
1390 /* VSX support implies Power ISA 2.06 */
1391 have_VX = True;
1392 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1393 have_VX = False;
1394 } else {
1395 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp vs0,vs0 */
1398 /* Check for Decimal Floating Point (DFP) support. */
1399 have_DFP = True;
1400 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1401 have_DFP = False;
1402 } else {
1403 __asm__ __volatile__(".long 0xec0e8005"); /* dadd f0,f14,f16 */
1406 /* Check for ISA 2.07 support. */
1407 have_isa_2_07 = True;
1408 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1409 have_isa_2_07 = False;
1410 } else {
1411 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd f0,r0 */
1414 /* Check for ISA 3.0 support. */
1415 have_isa_3_0 = True;
1416 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1417 have_isa_3_0 = False;
1418 } else {
1419 __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
1422 /* Check if Host supports scv instruction.
1423 Note, can not use the usual method of issuing the scv instruction and
1424 checking if it is supported or not. Issuing scv on a system that does
1425 not have scv support in the HWCAPS generates a message in dmesg,
1426 "Facility 'SCV' unavailable (12), exception". It is considered bad
1427 form to issue and scv on systems that do not support it.
1429 The function VG_(machine_ppc64_set_scv_support), is called in
1430 initimg-linux.c to set the flag ppc_scv_supported based on HWCAPS2
1431 value. The flag ppc_scv_supported is defined struct VexArchInfo,
1432 in file libvex.h The setting of ppc_scv_supported in VexArchInfo
1433 is checked in disInstr_PPC_WRK() to set the allow_scv flag. */
1435 /* Check for ISA 3.1 support. */
1436 have_isa_3_1 = True;
1437 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1438 have_isa_3_1 = False;
1439 } else {
1440 __asm__ __volatile__(".long 0x7f1401b6":::"r20"); /* brh r20,r24 */
1443 /* determine dcbz/dcbzl sizes while we still have the signal
1444 * handlers registered */
1445 find_ppc_dcbz_sz(&vai);
1447 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1448 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1449 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1450 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d ISA3.1 %d\n",
1451 (Int)have_F, (Int)have_V, (Int)have_FX,
1452 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1453 (Int)have_isa_2_07, (int)have_isa_3_0, (int)have_isa_3_1);
1454 /* on ppc64be, if we don't even have FP, just give up. */
1455 if (!have_F)
1456 return False;
1458 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1460 va = VexArchPPC64;
1461 # if defined(VKI_LITTLE_ENDIAN)
1462 vai.endness = VexEndnessLE;
1463 # elif defined(VKI_BIG_ENDIAN)
1464 vai.endness = VexEndnessBE;
1465 # else
1466 vai.endness = VexEndness_INVALID;
1467 # endif
1469 vai.hwcaps = 0;
1470 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1471 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1472 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1473 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1474 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1475 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1476 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1477 if (have_isa_3_1) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_1;
1479 VG_(machine_get_cache_info)(&vai);
1481 /* But we're not done yet: VG_(machine_ppc64_set_clszB) and
1482 VG_(machine_ppc64_set_scv_support) must be called before we're
1483 ready to go. */
1484 return True;
1487 #elif defined(VGA_s390x)
1489 # include "libvex_s390x_common.h"
1492 /* Instruction set detection code borrowed from ppc above. */
1493 vki_sigset_t saved_set, tmp_set;
1494 vki_sigaction_fromK_t saved_sigill_act;
1495 vki_sigaction_toK_t tmp_sigill_act;
1497 volatile Bool have_LDISP, have_STFLE;
1498 Int i, r, model;
1500 /* If the model is "unknown" don't treat this as an error. Assume
1501 this is a brand-new machine model for which we don't have the
1502 identification yet. Keeping fingers crossed. */
1503 model = VG_(get_machine_model)();
1505 /* Unblock SIGILL and stash away the old action for that signal */
1506 VG_(sigemptyset)(&tmp_set);
1507 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1509 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1510 vg_assert(r == 0);
1512 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1513 vg_assert(r == 0);
1514 tmp_sigill_act = saved_sigill_act;
1516 /* NODEFER: signal handler does not return (from the kernel's point of
1517 view), hence if it is to successfully catch a signal more than once,
1518 we need the NODEFER flag. */
1519 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1520 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1521 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1522 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1523 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1525 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1526 is not supported on z900. */
1528 have_LDISP = True;
1529 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1530 have_LDISP = False;
1531 } else {
1532 /* BASR loads the address of the next insn into r1. Needed to avoid
1533 a segfault in XY. */
1534 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1535 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1536 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1539 /* Check availability of STFLE. If available store facility bits
1540 in hoststfle. */
1541 ULong hoststfle[S390_NUM_FACILITY_DW];
1543 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1544 hoststfle[i] = 0;
1546 have_STFLE = True;
1547 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1548 have_STFLE = False;
1549 } else {
1550 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1552 __asm__(".insn s,0xb2b00000,%0" /* stfle */
1553 : "=Q"(hoststfle), "+d"(reg0)
1555 : "cc");
1558 /* Restore signals */
1559 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1560 vg_assert(r == 0);
1561 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1562 vg_assert(r == 0);
1563 va = VexArchS390X;
1564 vai.endness = VexEndnessBE;
1566 vai.hwcaps = model;
1567 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1568 if (have_LDISP) {
1569 /* Use long displacement only on machines >= z990. For all other
1570 machines it is millicoded and therefore slow. */
1571 if (model >= VEX_S390X_MODEL_Z990)
1572 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1575 /* Detect presence of certain facilities using the STFLE insn.
1576 Note, that these facilities were introduced at the same time or later
1577 as STFLE, so the absence of STLFE implies the absence of the facility
1578 we're trying to detect. */
1579 struct fac_hwcaps_map {
1580 UInt installed;
1581 UInt facility_bit;
1582 UInt hwcaps_bit;
1583 const HChar name[6]; // may need adjustment for new facility names
1584 } fac_hwcaps[] = {
1585 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1586 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1587 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1588 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1589 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1590 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1591 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1592 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1593 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1594 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1595 { False, S390_FAC_VX, VEX_HWCAPS_S390X_VX, "VX" },
1596 { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" },
1597 { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" },
1598 { False, S390_FAC_LSC2, VEX_HWCAPS_S390X_LSC2, "LSC2" },
1599 { False, S390_FAC_VXE, VEX_HWCAPS_S390X_VXE, "VXE" },
1600 { False, S390_FAC_DFLT, VEX_HWCAPS_S390X_DFLT, "DFLT" },
1601 { False, S390_FAC_NNPA, VEX_HWCAPS_S390X_NNPA, "NNPA" },
1604 /* Set hwcaps according to the detected facilities */
1605 UChar dw_number = 0;
1606 UChar fac_bit = 0;
1607 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1608 vg_assert(fac_hwcaps[i].facility_bit <= 191); // for now
1609 dw_number = fac_hwcaps[i].facility_bit / 64;
1610 fac_bit = fac_hwcaps[i].facility_bit % 64;
1611 if (hoststfle[dw_number] & (1ULL << (63 - fac_bit))) {
1612 fac_hwcaps[i].installed = True;
1613 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1617 /* Build up a string showing the probed-for facilities */
1618 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1619 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1620 7 + 1 + 4 + 2 // machine %4d
1621 + 1]; // \0
1622 HChar *p = fac_str;
1623 p += VG_(sprintf)(p, "machine %4d ", model);
1624 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1625 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1626 fac_hwcaps[i].installed);
1628 *p++ = '\0';
1630 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1631 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1633 VG_(machine_get_cache_info)(&vai);
1635 return True;
1638 #elif defined(VGA_arm)
1640 /* Same instruction set detection algorithm as for ppc32. */
1641 vki_sigset_t saved_set, tmp_set;
1642 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1643 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1645 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1646 volatile Int archlevel;
1647 Int r;
1649 /* This is a kludge. Really we ought to back-convert saved_act
1650 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1651 since that's a no-op on all ppc64 platforms so far supported,
1652 it's not worth the typing effort. At least include most basic
1653 sanity check: */
1654 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1656 VG_(sigemptyset)(&tmp_set);
1657 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1658 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1660 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1661 vg_assert(r == 0);
1663 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1664 vg_assert(r == 0);
1665 tmp_sigill_act = saved_sigill_act;
1667 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1668 tmp_sigfpe_act = saved_sigfpe_act;
1670 /* NODEFER: signal handler does not return (from the kernel's point of
1671 view), hence if it is to successfully catch a signal more than once,
1672 we need the NODEFER flag. */
1673 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1674 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1675 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1676 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1677 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1679 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1680 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1681 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1682 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1683 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1685 /* VFP insns */
1686 have_VFP = True;
1687 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1688 have_VFP = False;
1689 } else {
1690 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1692 /* There are several generation of VFP extension but they differs very
1693 little so for now we will not distinguish them. */
1694 have_VFP2 = have_VFP;
1695 have_VFP3 = have_VFP;
1697 /* NEON insns */
1698 have_NEON = True;
1699 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1700 have_NEON = False;
1701 } else {
1702 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1705 /* ARM architecture level */
1706 archlevel = 5; /* v5 will be base level */
1707 if (archlevel < 7) {
1708 archlevel = 7;
1709 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1710 archlevel = 5;
1711 } else {
1712 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1715 if (archlevel < 6) {
1716 archlevel = 6;
1717 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1718 archlevel = 5;
1719 } else {
1720 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1724 /* ARMv8 insns */
1725 have_V8 = True;
1726 if (archlevel == 7) {
1727 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1728 have_V8 = False;
1729 } else {
1730 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1732 if (have_V8 && have_NEON && have_VFP3) {
1733 archlevel = 8;
1737 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1738 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1739 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1740 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1741 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1743 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1744 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1745 (Int)have_NEON);
1747 VG_(machine_arm_archlevel) = archlevel;
1749 va = VexArchARM;
1750 vai.endness = VexEndnessLE;
1752 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1753 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1754 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1755 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1756 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1758 VG_(machine_get_cache_info)(&vai);
1760 return True;
1763 #elif defined(VGA_arm64)
1765 /* Use the attribute and feature registers to determine host hardware
1766 * capabilities. Only user-space features are read. Naming conventions
1767 * follow the Arm Architecture Reference Manual.
1769 * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
1770 * ----------------
1771 * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1772 * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1773 * FHM DP SM4 SM3 SHA3 RDM ATOMICS
1775 * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
1776 * ----------------
1777 * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1778 * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1779 * ...I8MM BF16 DPB
1781 * ID_AA64PFR0_EL1 Processor Feature Register 0
1782 * ---------------
1783 * 6666...2222 2222 1111 1111 11
1784 * 3210...7654 3210 9876 5432 1098 7654 3210
1785 * ASIMD FP16
1788 Bool is_base_v8 = False;
1790 Bool have_fhm, have_dp, have_sm4, have_sm3, have_sha3, have_rdm;
1791 Bool have_atomics, have_i8mm, have_bf16, have_dpbcvap, have_dpbcvadp;
1792 Bool have_vfp16, have_fp16;
1794 have_fhm = have_dp = have_sm4 = have_sm3 = have_sha3 = have_rdm
1795 = have_atomics = have_i8mm = have_bf16 = have_dpbcvap
1796 = have_dpbcvadp = have_vfp16 = have_fp16 = False;
1798 /* Some baseline v8.0 kernels do not allow reads of these registers. Use
1799 * the same SIGILL handling algorithm as other architectures for such
1800 * kernels.
1802 vki_sigset_t saved_set, tmp_set;
1803 vki_sigaction_fromK_t saved_sigill_act;
1804 vki_sigaction_toK_t tmp_sigill_act;
1806 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1808 VG_(sigemptyset)(&tmp_set);
1809 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1811 Int r;
1813 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1814 vg_assert(r == 0);
1816 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1817 vg_assert(r == 0);
1818 tmp_sigill_act = saved_sigill_act;
1820 /* NODEFER: signal handler does not return (from the kernel's point of
1821 view), hence if it is to successfully catch a signal more than once,
1822 we need the NODEFER flag. */
1823 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1824 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1825 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1826 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1827 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1829 /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
1830 if (VG_MINIMAL_SETJMP(env_unsup_insn))
1831 is_base_v8 = True;
1832 else
1833 __asm__ __volatile__("mrs x0, ID_AA64ISAR0_EL1");
1835 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1836 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1837 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1839 va = VexArchARM64;
1840 vai.endness = VexEndnessLE;
1842 /* Baseline features are v8.0. */
1843 vai.hwcaps = 0;
1845 VG_(machine_get_cache_info)(&vai);
1847 // @todo PJF ARM64 if we need this then we can't parse anything in /proc
1848 #if !defined(VGP_arm64_freebsd)
1849 /* Check whether we need to use the fallback LLSC implementation.
1850 If the check fails, give up. */
1851 if (! VG_(parse_cpuinfo)())
1852 return False;
1853 #endif
1855 /* 0 denotes 'not set'. The range of legitimate values here,
1856 after being set that is, is 2 though 17 inclusive. */
1857 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1858 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1859 ULong ctr_el0;
1860 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1861 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1862 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1863 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1864 "ctr_el0.iMinLine_szB = %d\n",
1865 1 << vai.arm64_dMinLine_lg2_szB,
1866 1 << vai.arm64_iMinLine_lg2_szB);
1867 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1868 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1870 if (is_base_v8)
1871 return True;
1873 /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
1874 #define ID_AA64ISAR0_FHM_SHIFT 48
1875 #define ID_AA64ISAR0_DP_SHIFT 44
1876 #define ID_AA64ISAR0_SM4_SHIFT 40
1877 #define ID_AA64ISAR0_SM3_SHIFT 36
1878 #define ID_AA64ISAR0_SHA3_SHIFT 32
1879 #define ID_AA64ISAR0_RDM_SHIFT 28
1880 #define ID_AA64ISAR0_ATOMICS_SHIFT 20
1881 /* Field values */
1882 #define ID_AA64ISAR0_FHM_SUPPORTED 0x1
1883 #define ID_AA64ISAR0_DP_SUPPORTED 0x1
1884 #define ID_AA64ISAR0_SM4_SUPPORTED 0x1
1885 #define ID_AA64ISAR0_SM3_SUPPORTED 0x1
1886 #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
1887 #define ID_AA64ISAR0_RDM_SUPPORTED 0x1
1888 #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
1890 /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
1891 #define ID_AA64ISAR1_I8MM_SHIFT 52
1892 #define ID_AA64ISAR1_BF16_SHIFT 44
1893 #define ID_AA64ISAR1_DPB_SHIFT 0
1894 /* Field values */
1895 #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
1896 #define ID_AA64ISAR1_BF16_SUPPORTED 0x1
1897 #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
1898 #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
1900 /* ID_AA64PFR0_EL1 Processor feature register 0 fields */
1901 #define ID_AA64PFR0_VFP16_SHIFT 20
1902 #define ID_AA64PFR0_FP16_SHIFT 16
1903 /* Field values */
1904 #define ID_AA64PFR0_VFP16_SUPPORTED 0x1
1905 #define ID_AA64PFR0_FP16_SUPPORTED 0x1
1907 #define get_cpu_ftr(id) ({ \
1908 unsigned long val; \
1909 asm("mrs %0, "#id : "=r" (val)); \
1910 VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
1912 get_cpu_ftr(ID_AA64ISAR0_EL1);
1913 get_cpu_ftr(ID_AA64ISAR1_EL1);
1914 get_cpu_ftr(ID_AA64PFR0_EL1);
1916 #define get_ftr(id, ftr, fval, have_ftr) ({ \
1917 unsigned long rval; \
1918 asm("mrs %0, "#id : "=r" (rval)); \
1919 have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
1922 /* Read ID_AA64ISAR0_EL1 attributes */
1924 /* FHM indicates support for FMLAL and FMLSL instructions.
1925 * Optional for v8.2.
1927 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT,
1928 ID_AA64ISAR0_FHM_SUPPORTED, have_fhm);
1930 /* DP indicates support for UDOT and SDOT instructions.
1931 * Optional for v8.2.
1933 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT,
1934 ID_AA64ISAR0_DP_SUPPORTED, have_dp);
1936 /* SM4 indicates support for SM4E and SM4EKEY instructions.
1937 * Optional for v8.2.
1939 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT,
1940 ID_AA64ISAR0_SM4_SUPPORTED, have_sm4);
1942 /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
1943 * SM3PARTW1, and SM3PARTW2 instructions.
1944 * Optional for v8.2.
1946 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT,
1947 ID_AA64ISAR0_SM3_SUPPORTED, have_sm3);
1949 /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
1950 * Optional for v8.2.
1952 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT,
1953 ID_AA64ISAR0_SHA3_SUPPORTED, have_sha3);
1955 /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
1956 * Mandatory from v8.1 onwards.
1958 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT,
1959 ID_AA64ISAR0_RDM_SUPPORTED, have_rdm);
1961 /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
1962 * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
1963 * Mandatory from v8.1 onwards.
1965 get_ftr(ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT,
1966 ID_AA64ISAR0_ATOMICS_SUPPORTED, have_atomics);
1968 /* Read ID_AA64ISAR1_EL1 attributes */
1970 /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
1971 * instructions.
1972 * Optional for v8.2.
1974 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT,
1975 ID_AA64ISAR1_I8MM_SUPPORTED, have_i8mm);
1977 /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
1978 * BFCVT2 instructions.
1979 * Optional for v8.2.
1981 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT,
1982 ID_AA64ISAR1_BF16_SUPPORTED, have_bf16);
1984 /* DPB indicates support for DC CVAP instruction.
1985 * Mandatory for v8.2 onwards.
1987 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
1988 ID_AA64ISAR1_DPBCVAP_SUPPORTED, have_dpbcvap);
1990 /* DPB indicates support for DC CVADP instruction.
1991 * Optional for v8.2.
1993 get_ftr(ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT,
1994 ID_AA64ISAR1_DPBCVADP_SUPPORTED, have_dpbcvadp);
1996 /* Read ID_AA64PFR0_EL1 attributes */
1998 /* VFP16 indicates support for half-precision vector arithmetic.
1999 * Optional for v8.2. Must be the same value as FP16.
2001 get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_VFP16_SHIFT,
2002 ID_AA64PFR0_VFP16_SUPPORTED, have_vfp16);
2004 /* FP16 indicates support for half-precision scalar arithmetic.
2005 * Optional for v8.2. Must be the same value as VFP16.
2007 get_ftr(ID_AA64PFR0_EL1, ID_AA64PFR0_FP16_SHIFT,
2008 ID_AA64PFR0_FP16_SUPPORTED, have_fp16);
2010 if (have_fhm) vai.hwcaps |= VEX_HWCAPS_ARM64_FHM;
2011 if (have_dpbcvap) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVAP;
2012 if (have_dpbcvadp) vai.hwcaps |= VEX_HWCAPS_ARM64_DPBCVADP;
2013 if (have_sm3) vai.hwcaps |= VEX_HWCAPS_ARM64_SM3;
2014 if (have_sm4) vai.hwcaps |= VEX_HWCAPS_ARM64_SM4;
2015 if (have_sha3) vai.hwcaps |= VEX_HWCAPS_ARM64_SHA3;
2016 if (have_rdm) vai.hwcaps |= VEX_HWCAPS_ARM64_RDM;
2017 if (have_i8mm) vai.hwcaps |= VEX_HWCAPS_ARM64_I8MM;
2018 if (have_atomics) vai.hwcaps |= VEX_HWCAPS_ARM64_ATOMICS;
2019 if (have_bf16) vai.hwcaps |= VEX_HWCAPS_ARM64_BF16;
2020 if (have_fp16) vai.hwcaps |= VEX_HWCAPS_ARM64_FP16;
2021 if (have_vfp16) vai.hwcaps |= VEX_HWCAPS_ARM64_VFP16;
2023 #undef get_cpu_ftr
2024 #undef get_ftr
2026 return True;
2029 #elif defined(VGA_mips32)
2031 /* Define the position of F64 bit in FIR register. */
2032 # define FP64 22
2033 va = VexArchMIPS32;
2034 if (!VG_(parse_cpuinfo)())
2035 return False;
2037 # if defined(VKI_LITTLE_ENDIAN)
2038 vai.endness = VexEndnessLE;
2039 # elif defined(VKI_BIG_ENDIAN)
2040 vai.endness = VexEndnessBE;
2041 # else
2042 vai.endness = VexEndness_INVALID;
2043 # endif
2045 /* Same instruction set detection algorithm as for ppc32/arm... */
2046 vki_sigset_t saved_set, tmp_set;
2047 vki_sigaction_fromK_t saved_sigill_act;
2048 vki_sigaction_toK_t tmp_sigill_act;
2050 volatile Bool have_DSP, have_DSPr2, have_MSA;
2051 Int r;
2053 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
2055 VG_(sigemptyset)(&tmp_set);
2056 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
2058 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
2059 vg_assert(r == 0);
2061 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
2062 vg_assert(r == 0);
2063 tmp_sigill_act = saved_sigill_act;
2065 /* NODEFER: signal handler does not return (from the kernel's point of
2066 view), hence if it is to successfully catch a signal more than once,
2067 we need the NODEFER flag. */
2068 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
2069 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
2070 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
2071 tmp_sigill_act.ksa_handler = handler_unsup_insn;
2072 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2074 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
2076 /* MSA instructions. */
2077 have_MSA = True;
2078 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2079 have_MSA = False;
2080 } else {
2081 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2083 if (have_MSA) {
2084 vai.hwcaps |= VEX_PRID_IMP_P5600;
2085 } else {
2086 /* DSPr2 instructions. */
2087 have_DSPr2 = True;
2088 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2089 have_DSPr2 = False;
2090 } else {
2091 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
2093 if (have_DSPr2) {
2094 /* We assume it's 74K, since it can run DSPr2. */
2095 vai.hwcaps |= VEX_PRID_IMP_74K;
2096 } else {
2097 /* DSP instructions. */
2098 have_DSP = True;
2099 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2100 have_DSP = False;
2101 } else {
2102 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
2104 if (have_DSP) {
2105 /* We assume it's 34K, since it has support for DSP. */
2106 vai.hwcaps |= VEX_PRID_IMP_34K;
2112 # if defined(VGP_mips32_linux)
2113 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
2114 # else
2115 Int fpmode = -1;
2116 # endif
2118 if (fpmode < 0) {
2119 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
2120 we are using alternative way to determine FP mode */
2121 ULong result = 0;
2123 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
2124 __asm__ volatile (
2125 ".set push\n\t"
2126 ".set noreorder\n\t"
2127 ".set oddspreg\n\t"
2128 ".set hardfloat\n\t"
2129 "lui $t0, 0x3FF0\n\t"
2130 "ldc1 $f0, %0\n\t"
2131 "mtc1 $t0, $f1\n\t"
2132 "sdc1 $f0, %0\n\t"
2133 ".set pop\n\t"
2134 : "+m"(result)
2136 : "t0", "$f0", "$f1", "memory");
2138 fpmode = (result != 0x3FF0000000000000ull);
2142 if (fpmode != 0)
2143 vai.hwcaps |= VEX_MIPS_HOST_FR;
2145 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
2146 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2147 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
2149 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2150 VG_(machine_get_cache_info)(&vai);
2152 return True;
2155 #elif defined(VGA_mips64)
2157 va = VexArchMIPS64;
2158 if (!VG_(parse_cpuinfo)())
2159 return False;
2161 # if defined(VKI_LITTLE_ENDIAN)
2162 vai.endness = VexEndnessLE;
2163 # elif defined(VKI_BIG_ENDIAN)
2164 vai.endness = VexEndnessBE;
2165 # else
2166 vai.endness = VexEndness_INVALID;
2167 # endif
2169 vai.hwcaps |= VEX_MIPS_HOST_FR;
2171 /* Same instruction set detection algorithm as for ppc32/arm... */
2172 vki_sigset_t saved_set, tmp_set;
2173 vki_sigaction_fromK_t saved_sigill_act;
2174 vki_sigaction_toK_t tmp_sigill_act;
2176 volatile Bool have_MSA;
2177 Int r;
2179 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
2181 VG_(sigemptyset)(&tmp_set);
2182 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
2184 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
2185 vg_assert(r == 0);
2187 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
2188 vg_assert(r == 0);
2189 tmp_sigill_act = saved_sigill_act;
2191 /* NODEFER: signal handler does not return (from the kernel's point of
2192 view), hence if it is to successfully catch a signal more than once,
2193 we need the NODEFER flag. */
2194 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
2195 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
2196 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
2197 tmp_sigill_act.ksa_handler = handler_unsup_insn;
2198 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2200 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
2202 /* MSA instructions */
2203 have_MSA = True;
2204 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
2205 have_MSA = False;
2206 } else {
2207 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2209 if (have_MSA) {
2210 vai.hwcaps |= VEX_PRID_IMP_P5600;
2214 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
2215 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
2216 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
2218 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2220 VG_(machine_get_cache_info)(&vai);
2222 return True;
2225 #elif defined(VGP_nanomips_linux)
2227 va = VexArchNANOMIPS;
2228 vai.hwcaps = 0;
2230 # if defined(VKI_LITTLE_ENDIAN)
2231 vai.endness = VexEndnessLE;
2232 # elif defined(VKI_BIG_ENDIAN)
2233 vai.endness = VexEndnessBE;
2234 # else
2235 vai.endness = VexEndness_INVALID;
2236 # endif
2238 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
2240 VG_(machine_get_cache_info)(&vai);
2242 return True;
2244 #else
2245 # error "Unknown arch"
2246 #endif
2249 /* Notify host cpu instruction cache line size. */
2250 #if defined(VGA_ppc32)
2251 void VG_(machine_ppc32_set_clszB)( Int szB )
2253 vg_assert(hwcaps_done);
2255 /* Either the value must not have been set yet (zero) or we can
2256 tolerate it being set to the same value multiple times, as the
2257 stack scanning logic in m_main is a bit stupid. */
2258 vg_assert(vai.ppc_icache_line_szB == 0
2259 || vai.ppc_icache_line_szB == szB);
2261 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
2262 vai.ppc_icache_line_szB = szB;
2264 #endif
2267 /* Notify host cpu instruction cache line size. */
2268 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
2269 void VG_(machine_ppc64_set_clszB)( Int szB )
2271 vg_assert(hwcaps_done);
2273 /* Either the value must not have been set yet (zero) or we can
2274 tolerate it being set to the same value multiple times, as the
2275 stack scanning logic in m_main is a bit stupid. */
2276 vg_assert(vai.ppc_icache_line_szB == 0
2277 || vai.ppc_icache_line_szB == szB);
2279 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
2280 vai.ppc_icache_line_szB = szB;
2283 void VG_(machine_ppc64_set_scv_support)( Int is_supported )
2285 vg_assert(hwcaps_done);
2286 vai.ppc_scv_supported = is_supported;
2289 #endif
2292 /* Notify host's ability to handle NEON instructions. */
2293 #if defined(VGA_arm)
2294 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
2296 vg_assert(hwcaps_done);
2297 /* There's nothing else we can sanity check. */
2299 if (has_neon) {
2300 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
2301 } else {
2302 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
2305 #endif
2308 /* Fetch host cpu info, once established. */
2309 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
2310 /*OUT*/VexArchInfo* pVai )
2312 vg_assert(hwcaps_done);
2313 if (pVa) *pVa = va;
2314 if (pVai) *pVai = vai;
2318 /* Returns the size of the largest guest register that we will
2319 simulate in this run. This depends on both the guest architecture
2320 and on the specific capabilities we are simulating for that guest
2321 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2322 or 32. General rule: if in doubt, return a value larger than
2323 reality.
2325 This information is needed by Cachegrind and Callgrind to decide
2326 what the minimum cache line size they are prepared to simulate is.
2327 Basically require that the minimum cache line size is at least as
2328 large as the largest register that might get transferred to/from
2329 memory, so as to guarantee that any such transaction can straddle
2330 at most 2 cache lines.
2332 Int VG_(machine_get_size_of_largest_guest_register) ( void )
2334 vg_assert(hwcaps_done);
2335 /* Once hwcaps_done is True, we can fish around inside va/vai to
2336 find the information we need. */
2338 # if defined(VGA_x86)
2339 vg_assert(va == VexArchX86);
2340 /* We don't support AVX, so 32 is out. At the other end, even if
2341 we don't support any SSE, the X87 can generate 10 byte
2342 transfers, so let's say 16 to be on the safe side. Hence the
2343 answer is always 16. */
2344 return 16;
2346 # elif defined(VGA_amd64)
2347 /* if AVX then 32 else 16 */
2348 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
2350 # elif defined(VGA_ppc32)
2351 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2352 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
2353 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
2354 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
2355 return 8;
2357 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2358 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2359 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
2360 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
2361 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
2362 return 8;
2364 # elif defined(VGA_s390x)
2365 return 8;
2367 # elif defined(VGA_arm)
2368 /* Really it depends whether or not we have NEON, but let's just
2369 assume we always do. */
2370 return 16;
2372 # elif defined(VGA_arm64)
2373 /* ARM64 always has Neon, AFAICS. */
2374 return 16;
2376 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
2377 /* The guest state implies 4, but that can't really be true, can
2378 it? */
2379 return 8;
2381 # elif defined(VGA_mips64)
2382 return 8;
2384 # else
2385 # error "Unknown arch"
2386 # endif
2390 // Given a pointer to a function as obtained by "& functionname" in C,
2391 // produce a pointer to the actual entry point for the function.
2392 void* VG_(fnptr_to_fnentry)( void* f )
2394 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2395 || defined(VGP_arm_linux) || defined(VGO_darwin) || defined(VGO_freebsd) \
2396 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2397 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2398 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2399 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
2400 || defined(VGP_nanomips_linux)
2401 return f;
2402 # elif defined(VGP_ppc64be_linux)
2403 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2404 3-word function descriptor, of which the first word is the entry
2405 address. */
2406 UWord* descr = (UWord*)f;
2407 return (void*)(descr[0]);
2408 # else
2409 # error "Unknown platform"
2410 # endif
2413 /*--------------------------------------------------------------------*/
2414 /*--- end ---*/
2415 /*--------------------------------------------------------------------*/