Add support for the Linux io_uring system calls
[valgrind.git] / coregrind / m_machine.c
blob9eab9000f4114d1b43014b181d3d9ef0e148f696
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr VG_(get_IP) ( ThreadId tid ) {
50 return INSTR_PTR( VG_(threads)[tid].arch );
52 Addr VG_(get_SP) ( ThreadId tid ) {
53 return STACK_PTR( VG_(threads)[tid].arch );
55 Addr VG_(get_FP) ( ThreadId tid ) {
56 return FRAME_PTR( VG_(threads)[tid].arch );
59 Addr VG_(get_SP_s1) ( ThreadId tid ) {
60 return STACK_PTR_S1( VG_(threads)[tid].arch );
62 void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
63 STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
66 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
67 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
69 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
70 STACK_PTR( VG_(threads)[tid].arch ) = sp;
73 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
74 ThreadId tid )
76 # if defined(VGA_x86)
77 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
78 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
79 regs->misc.X86.r_ebp
80 = VG_(threads)[tid].arch.vex.guest_EBP;
81 # elif defined(VGA_amd64)
82 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
83 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
84 regs->misc.AMD64.r_rbp
85 = VG_(threads)[tid].arch.vex.guest_RBP;
86 # elif defined(VGA_ppc32)
87 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
88 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
89 regs->misc.PPC32.r_lr
90 = VG_(threads)[tid].arch.vex.guest_LR;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
93 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
94 regs->misc.PPC64.r_lr
95 = VG_(threads)[tid].arch.vex.guest_LR;
96 # elif defined(VGA_arm)
97 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
98 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
99 regs->misc.ARM.r14
100 = VG_(threads)[tid].arch.vex.guest_R14;
101 regs->misc.ARM.r12
102 = VG_(threads)[tid].arch.vex.guest_R12;
103 regs->misc.ARM.r11
104 = VG_(threads)[tid].arch.vex.guest_R11;
105 regs->misc.ARM.r7
106 = VG_(threads)[tid].arch.vex.guest_R7;
107 # elif defined(VGA_arm64)
108 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
109 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
110 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
111 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
112 # elif defined(VGA_s390x)
113 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
114 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
115 regs->misc.S390X.r_fp
116 = VG_(threads)[tid].arch.vex.guest_FP;
117 regs->misc.S390X.r_lr
118 = VG_(threads)[tid].arch.vex.guest_LR;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs->misc.S390X.r_f0
121 = VG_(threads)[tid].arch.vex.guest_v0.w64[0];
122 regs->misc.S390X.r_f1
123 = VG_(threads)[tid].arch.vex.guest_v1.w64[0];
124 regs->misc.S390X.r_f2
125 = VG_(threads)[tid].arch.vex.guest_v2.w64[0];
126 regs->misc.S390X.r_f3
127 = VG_(threads)[tid].arch.vex.guest_v3.w64[0];
128 regs->misc.S390X.r_f4
129 = VG_(threads)[tid].arch.vex.guest_v4.w64[0];
130 regs->misc.S390X.r_f5
131 = VG_(threads)[tid].arch.vex.guest_v5.w64[0];
132 regs->misc.S390X.r_f6
133 = VG_(threads)[tid].arch.vex.guest_v6.w64[0];
134 regs->misc.S390X.r_f7
135 = VG_(threads)[tid].arch.vex.guest_v7.w64[0];
136 # elif defined(VGA_mips32)
137 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
138 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
139 regs->misc.MIPS32.r30
140 = VG_(threads)[tid].arch.vex.guest_r30;
141 regs->misc.MIPS32.r31
142 = VG_(threads)[tid].arch.vex.guest_r31;
143 regs->misc.MIPS32.r28
144 = VG_(threads)[tid].arch.vex.guest_r28;
145 # elif defined(VGA_mips64)
146 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
147 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
148 regs->misc.MIPS64.r30
149 = VG_(threads)[tid].arch.vex.guest_r30;
150 regs->misc.MIPS64.r31
151 = VG_(threads)[tid].arch.vex.guest_r31;
152 regs->misc.MIPS64.r28
153 = VG_(threads)[tid].arch.vex.guest_r28;
154 # else
155 # error "Unknown arch"
156 # endif
159 void
160 VG_(get_shadow_regs_area) ( ThreadId tid,
161 /*DST*/UChar* dst,
162 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
164 void* src;
165 ThreadState* tst;
166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167 vg_assert(VG_(is_valid_tid)(tid));
168 // Bounds check
169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170 vg_assert(offset + size <= sizeof(VexGuestArchState));
171 // Copy
172 tst = & VG_(threads)[tid];
173 src = NULL;
174 switch (shadowNo) {
175 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
179 vg_assert(src != NULL);
180 VG_(memcpy)( dst, src, size);
183 void
184 VG_(set_shadow_regs_area) ( ThreadId tid,
185 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
186 /*SRC*/const UChar* src )
188 void* dst;
189 ThreadState* tst;
190 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
191 vg_assert(VG_(is_valid_tid)(tid));
192 // Bounds check
193 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
194 vg_assert(offset + size <= sizeof(VexGuestArchState));
195 // Copy
196 tst = & VG_(threads)[tid];
197 dst = NULL;
198 switch (shadowNo) {
199 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
200 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
201 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
203 vg_assert(dst != NULL);
204 VG_(memcpy)( dst, src, size);
208 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
209 const HChar*, Addr))
211 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
212 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
213 #if defined(VGA_x86)
214 (*f)(tid, "EAX", vex->guest_EAX);
215 (*f)(tid, "ECX", vex->guest_ECX);
216 (*f)(tid, "EDX", vex->guest_EDX);
217 (*f)(tid, "EBX", vex->guest_EBX);
218 (*f)(tid, "ESI", vex->guest_ESI);
219 (*f)(tid, "EDI", vex->guest_EDI);
220 (*f)(tid, "ESP", vex->guest_ESP);
221 (*f)(tid, "EBP", vex->guest_EBP);
222 #elif defined(VGA_amd64)
223 (*f)(tid, "RAX", vex->guest_RAX);
224 (*f)(tid, "RCX", vex->guest_RCX);
225 (*f)(tid, "RDX", vex->guest_RDX);
226 (*f)(tid, "RBX", vex->guest_RBX);
227 (*f)(tid, "RSI", vex->guest_RSI);
228 (*f)(tid, "RDI", vex->guest_RDI);
229 (*f)(tid, "RSP", vex->guest_RSP);
230 (*f)(tid, "RBP", vex->guest_RBP);
231 (*f)(tid, "R8" , vex->guest_R8 );
232 (*f)(tid, "R9" , vex->guest_R9 );
233 (*f)(tid, "R10", vex->guest_R10);
234 (*f)(tid, "R11", vex->guest_R11);
235 (*f)(tid, "R12", vex->guest_R12);
236 (*f)(tid, "R13", vex->guest_R13);
237 (*f)(tid, "R14", vex->guest_R14);
238 (*f)(tid, "R15", vex->guest_R15);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f)(tid, "GPR0" , vex->guest_GPR0 );
241 (*f)(tid, "GPR1" , vex->guest_GPR1 );
242 (*f)(tid, "GPR2" , vex->guest_GPR2 );
243 (*f)(tid, "GPR3" , vex->guest_GPR3 );
244 (*f)(tid, "GPR4" , vex->guest_GPR4 );
245 (*f)(tid, "GPR5" , vex->guest_GPR5 );
246 (*f)(tid, "GPR6" , vex->guest_GPR6 );
247 (*f)(tid, "GPR7" , vex->guest_GPR7 );
248 (*f)(tid, "GPR8" , vex->guest_GPR8 );
249 (*f)(tid, "GPR9" , vex->guest_GPR9 );
250 (*f)(tid, "GPR10", vex->guest_GPR10);
251 (*f)(tid, "GPR11", vex->guest_GPR11);
252 (*f)(tid, "GPR12", vex->guest_GPR12);
253 (*f)(tid, "GPR13", vex->guest_GPR13);
254 (*f)(tid, "GPR14", vex->guest_GPR14);
255 (*f)(tid, "GPR15", vex->guest_GPR15);
256 (*f)(tid, "GPR16", vex->guest_GPR16);
257 (*f)(tid, "GPR17", vex->guest_GPR17);
258 (*f)(tid, "GPR18", vex->guest_GPR18);
259 (*f)(tid, "GPR19", vex->guest_GPR19);
260 (*f)(tid, "GPR20", vex->guest_GPR20);
261 (*f)(tid, "GPR21", vex->guest_GPR21);
262 (*f)(tid, "GPR22", vex->guest_GPR22);
263 (*f)(tid, "GPR23", vex->guest_GPR23);
264 (*f)(tid, "GPR24", vex->guest_GPR24);
265 (*f)(tid, "GPR25", vex->guest_GPR25);
266 (*f)(tid, "GPR26", vex->guest_GPR26);
267 (*f)(tid, "GPR27", vex->guest_GPR27);
268 (*f)(tid, "GPR28", vex->guest_GPR28);
269 (*f)(tid, "GPR29", vex->guest_GPR29);
270 (*f)(tid, "GPR30", vex->guest_GPR30);
271 (*f)(tid, "GPR31", vex->guest_GPR31);
272 (*f)(tid, "CTR" , vex->guest_CTR );
273 (*f)(tid, "LR" , vex->guest_LR );
274 #elif defined(VGA_arm)
275 (*f)(tid, "R0" , vex->guest_R0 );
276 (*f)(tid, "R1" , vex->guest_R1 );
277 (*f)(tid, "R2" , vex->guest_R2 );
278 (*f)(tid, "R3" , vex->guest_R3 );
279 (*f)(tid, "R4" , vex->guest_R4 );
280 (*f)(tid, "R5" , vex->guest_R5 );
281 (*f)(tid, "R6" , vex->guest_R6 );
282 (*f)(tid, "R8" , vex->guest_R8 );
283 (*f)(tid, "R9" , vex->guest_R9 );
284 (*f)(tid, "R10", vex->guest_R10);
285 (*f)(tid, "R11", vex->guest_R11);
286 (*f)(tid, "R12", vex->guest_R12);
287 (*f)(tid, "R13", vex->guest_R13);
288 (*f)(tid, "R14", vex->guest_R14);
289 #elif defined(VGA_s390x)
290 (*f)(tid, "r0" , vex->guest_r0 );
291 (*f)(tid, "r1" , vex->guest_r1 );
292 (*f)(tid, "r2" , vex->guest_r2 );
293 (*f)(tid, "r3" , vex->guest_r3 );
294 (*f)(tid, "r4" , vex->guest_r4 );
295 (*f)(tid, "r5" , vex->guest_r5 );
296 (*f)(tid, "r6" , vex->guest_r6 );
297 (*f)(tid, "r7" , vex->guest_r7 );
298 (*f)(tid, "r8" , vex->guest_r8 );
299 (*f)(tid, "r9" , vex->guest_r9 );
300 (*f)(tid, "r10", vex->guest_r10);
301 (*f)(tid, "r11", vex->guest_r11);
302 (*f)(tid, "r12", vex->guest_r12);
303 (*f)(tid, "r13", vex->guest_r13);
304 (*f)(tid, "r14", vex->guest_r14);
305 (*f)(tid, "r15", vex->guest_r15);
306 #elif defined(VGA_mips32) || defined(VGA_mips64)
307 (*f)(tid, "r0" , vex->guest_r0 );
308 (*f)(tid, "r1" , vex->guest_r1 );
309 (*f)(tid, "r2" , vex->guest_r2 );
310 (*f)(tid, "r3" , vex->guest_r3 );
311 (*f)(tid, "r4" , vex->guest_r4 );
312 (*f)(tid, "r5" , vex->guest_r5 );
313 (*f)(tid, "r6" , vex->guest_r6 );
314 (*f)(tid, "r7" , vex->guest_r7 );
315 (*f)(tid, "r8" , vex->guest_r8 );
316 (*f)(tid, "r9" , vex->guest_r9 );
317 (*f)(tid, "r10", vex->guest_r10);
318 (*f)(tid, "r11", vex->guest_r11);
319 (*f)(tid, "r12", vex->guest_r12);
320 (*f)(tid, "r13", vex->guest_r13);
321 (*f)(tid, "r14", vex->guest_r14);
322 (*f)(tid, "r15", vex->guest_r15);
323 (*f)(tid, "r16", vex->guest_r16);
324 (*f)(tid, "r17", vex->guest_r17);
325 (*f)(tid, "r18", vex->guest_r18);
326 (*f)(tid, "r19", vex->guest_r19);
327 (*f)(tid, "r20", vex->guest_r20);
328 (*f)(tid, "r21", vex->guest_r21);
329 (*f)(tid, "r22", vex->guest_r22);
330 (*f)(tid, "r23", vex->guest_r23);
331 (*f)(tid, "r24", vex->guest_r24);
332 (*f)(tid, "r25", vex->guest_r25);
333 (*f)(tid, "r26", vex->guest_r26);
334 (*f)(tid, "r27", vex->guest_r27);
335 (*f)(tid, "r28", vex->guest_r28);
336 (*f)(tid, "r29", vex->guest_r29);
337 (*f)(tid, "r30", vex->guest_r30);
338 (*f)(tid, "r31", vex->guest_r31);
339 #elif defined(VGA_arm64)
340 (*f)(tid, "x0" , vex->guest_X0 );
341 (*f)(tid, "x1" , vex->guest_X1 );
342 (*f)(tid, "x2" , vex->guest_X2 );
343 (*f)(tid, "x3" , vex->guest_X3 );
344 (*f)(tid, "x4" , vex->guest_X4 );
345 (*f)(tid, "x5" , vex->guest_X5 );
346 (*f)(tid, "x6" , vex->guest_X6 );
347 (*f)(tid, "x7" , vex->guest_X7 );
348 (*f)(tid, "x8" , vex->guest_X8 );
349 (*f)(tid, "x9" , vex->guest_X9 );
350 (*f)(tid, "x10", vex->guest_X10);
351 (*f)(tid, "x11", vex->guest_X11);
352 (*f)(tid, "x12", vex->guest_X12);
353 (*f)(tid, "x13", vex->guest_X13);
354 (*f)(tid, "x14", vex->guest_X14);
355 (*f)(tid, "x15", vex->guest_X15);
356 (*f)(tid, "x16", vex->guest_X16);
357 (*f)(tid, "x17", vex->guest_X17);
358 (*f)(tid, "x18", vex->guest_X18);
359 (*f)(tid, "x19", vex->guest_X19);
360 (*f)(tid, "x20", vex->guest_X20);
361 (*f)(tid, "x21", vex->guest_X21);
362 (*f)(tid, "x22", vex->guest_X22);
363 (*f)(tid, "x23", vex->guest_X23);
364 (*f)(tid, "x24", vex->guest_X24);
365 (*f)(tid, "x25", vex->guest_X25);
366 (*f)(tid, "x26", vex->guest_X26);
367 (*f)(tid, "x27", vex->guest_X27);
368 (*f)(tid, "x28", vex->guest_X28);
369 (*f)(tid, "x29", vex->guest_X29);
370 (*f)(tid, "x30", vex->guest_X30);
371 #else
372 # error Unknown arch
373 #endif
377 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
379 ThreadId tid;
381 for (tid = 1; tid < VG_N_THREADS; tid++) {
382 if (VG_(is_valid_tid)(tid)
383 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
384 // live thread or thread instructed to die by another thread that
385 // called exit.
386 apply_to_GPs_of_tid(tid, f);
391 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
393 *tid = (ThreadId)(-1);
396 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
397 /*OUT*/Addr* stack_min,
398 /*OUT*/Addr* stack_max)
400 ThreadId i;
401 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
402 if (i == VG_INVALID_THREADID)
403 continue;
404 if (VG_(threads)[i].status != VgTs_Empty) {
405 *tid = i;
406 *stack_min = VG_(get_SP)(i);
407 *stack_max = VG_(threads)[i].client_stack_highest_byte;
408 return True;
411 return False;
414 Addr VG_(thread_get_stack_max)(ThreadId tid)
416 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
417 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
418 return VG_(threads)[tid].client_stack_highest_byte;
421 SizeT VG_(thread_get_stack_size)(ThreadId tid)
423 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
424 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
425 return VG_(threads)[tid].client_stack_szB;
428 Addr VG_(thread_get_altstack_min)(ThreadId tid)
430 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
431 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
432 return (Addr)VG_(threads)[tid].altstack.ss_sp;
435 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
437 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
438 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
439 return VG_(threads)[tid].altstack.ss_size;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done = False;
458 /* --- all archs --- */
459 static VexArch va = VexArch_INVALID;
460 static VexArchInfo vai;
462 #if defined(VGA_x86)
463 UInt VG_(machine_x86_have_mxcsr) = 0;
464 #endif
465 #if defined(VGA_ppc32)
466 UInt VG_(machine_ppc32_has_FP) = 0;
467 UInt VG_(machine_ppc32_has_VMX) = 0;
468 #endif
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong VG_(machine_ppc64_has_VMX) = 0;
471 #endif
472 #if defined(VGA_arm)
473 Int VG_(machine_arm_archlevel) = 4;
474 #endif
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
481 #include "pub_core_libcsetjmp.h"
482 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
483 static void handler_unsup_insn ( Int x ) {
484 VG_MINIMAL_LONGJMP(env_unsup_insn);
486 #endif
489 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
490 * handlers are installed. Determines the sizes affected by dcbz
491 * and dcbzl instructions and updates the given VexArchInfo structure
492 * accordingly.
494 * Not very defensive: assumes that as long as the dcbz/dcbzl
495 * instructions don't raise a SIGILL, that they will zero an aligned,
496 * contiguous block of memory of a sensible size. */
497 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
498 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
500 Int dcbz_szB = 0;
501 Int dcbzl_szB;
502 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
503 char test_block[4*MAX_DCBZL_SZB];
504 char *aligned = test_block;
505 Int i;
507 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
508 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
509 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
511 /* dcbz often clears 32B, although sometimes whatever the native cache
512 * block size is */
513 VG_(memset)(test_block, 0xff, sizeof(test_block));
514 __asm__ __volatile__("dcbz 0,%0"
515 : /*out*/
516 : "r" (aligned) /*in*/
517 : "memory" /*clobber*/);
518 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
519 if (!test_block[i])
520 ++dcbz_szB;
522 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
524 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
525 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
526 dcbzl_szB = 0; /* indicates unsupported */
528 else {
529 VG_(memset)(test_block, 0xff, sizeof(test_block));
530 /* some older assemblers won't understand the dcbzl instruction
531 * variant, so we directly emit the instruction ourselves */
532 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
533 : /*out*/
534 : "r" (aligned) /*in*/
535 : "memory", "r9" /*clobber*/);
536 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
537 if (!test_block[i])
538 ++dcbzl_szB;
540 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
543 arch_info->ppc_dcbz_szB = dcbz_szB;
544 arch_info->ppc_dcbzl_szB = dcbzl_szB;
546 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
547 dcbz_szB, dcbzl_szB);
548 # undef MAX_DCBZL_SZB
550 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
552 #ifdef VGA_s390x
554 /* Read /proc/cpuinfo. Look for lines like these
556 processor 0: version = FF, identification = 0117C9, machine = 2064
558 and return the machine model. If the machine model could not be determined
559 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
561 static UInt VG_(get_machine_model)(void)
563 static struct model_map {
564 const HChar name[5];
565 UInt id;
566 } model_map[] = {
567 { "2064", VEX_S390X_MODEL_Z900 },
568 { "2066", VEX_S390X_MODEL_Z800 },
569 { "2084", VEX_S390X_MODEL_Z990 },
570 { "2086", VEX_S390X_MODEL_Z890 },
571 { "2094", VEX_S390X_MODEL_Z9_EC },
572 { "2096", VEX_S390X_MODEL_Z9_BC },
573 { "2097", VEX_S390X_MODEL_Z10_EC },
574 { "2098", VEX_S390X_MODEL_Z10_BC },
575 { "2817", VEX_S390X_MODEL_Z196 },
576 { "2818", VEX_S390X_MODEL_Z114 },
577 { "2827", VEX_S390X_MODEL_ZEC12 },
578 { "2828", VEX_S390X_MODEL_ZBC12 },
579 { "2964", VEX_S390X_MODEL_Z13 },
580 { "2965", VEX_S390X_MODEL_Z13S },
581 { "3906", VEX_S390X_MODEL_Z14 },
582 { "3907", VEX_S390X_MODEL_Z14_ZR1 },
585 Int model, n, fh;
586 SysRes fd;
587 SizeT num_bytes, file_buf_size;
588 HChar *p, *m, *model_name, *file_buf;
590 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
591 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
592 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
594 fh = sr_Res(fd);
596 /* Determine the size of /proc/cpuinfo.
597 Work around broken-ness in /proc file system implementation.
598 fstat returns a zero size for /proc/cpuinfo although it is
599 claimed to be a regular file. */
600 num_bytes = 0;
601 file_buf_size = 1000;
602 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
603 while (42) {
604 n = VG_(read)(fh, file_buf, file_buf_size);
605 if (n < 0) break;
607 num_bytes += n;
608 if (n < file_buf_size) break; /* reached EOF */
611 if (n < 0) num_bytes = 0; /* read error; ignore contents */
613 if (num_bytes > file_buf_size) {
614 VG_(free)( file_buf );
615 VG_(lseek)( fh, 0, VKI_SEEK_SET );
616 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
617 n = VG_(read)( fh, file_buf, num_bytes );
618 if (n < 0) num_bytes = 0;
621 file_buf[num_bytes] = '\0';
622 VG_(close)(fh);
624 /* Parse file */
625 model = VEX_S390X_MODEL_UNKNOWN;
626 for (p = file_buf; *p; ++p) {
627 /* Beginning of line */
628 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
630 m = VG_(strstr)( p, "machine" );
631 if (m == NULL) continue;
633 p = m + sizeof "machine" - 1;
634 while ( VG_(isspace)( *p ) || *p == '=') {
635 if (*p == '\n') goto next_line;
636 ++p;
639 model_name = p;
640 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
641 struct model_map *mm = model_map + n;
642 SizeT len = VG_(strlen)( mm->name );
643 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
644 VG_(isspace)( model_name[len] )) {
645 if (mm->id < model) model = mm->id;
646 p = model_name + len;
647 break;
650 /* Skip until end-of-line */
651 while (*p != '\n')
652 ++p;
653 next_line: ;
656 VG_(free)( file_buf );
657 VG_(debugLog)(1, "machine", "model = %s\n",
658 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
659 : model_map[model].name);
660 return model;
663 #endif /* defined(VGA_s390x) */
665 #if defined(VGA_mips32) || defined(VGA_mips64)
668 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
669 * determine what CPU it is (it searches only for the models that are or may be
670 * supported by Valgrind).
672 static Bool VG_(parse_cpuinfo)(void)
674 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
675 const char *search_Cavium_str= "cpu model\t\t: Cavium";
676 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
677 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
678 const char *search_MIPS_str = "cpu model\t\t: MIPS";
679 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
681 Int n, fh;
682 SysRes fd;
683 SizeT num_bytes, file_buf_size;
684 HChar *file_buf, *isa;
686 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
687 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
688 if ( sr_isError(fd) ) return False;
690 fh = sr_Res(fd);
692 /* Determine the size of /proc/cpuinfo.
693 Work around broken-ness in /proc file system implementation.
694 fstat returns a zero size for /proc/cpuinfo although it is
695 claimed to be a regular file. */
696 num_bytes = 0;
697 file_buf_size = 1000;
698 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
699 while (42) {
700 n = VG_(read)(fh, file_buf, file_buf_size);
701 if (n < 0) break;
703 num_bytes += n;
704 if (n < file_buf_size) break; /* reached EOF */
707 if (n < 0) num_bytes = 0; /* read error; ignore contents */
709 if (num_bytes > file_buf_size) {
710 VG_(free)( file_buf );
711 VG_(lseek)( fh, 0, VKI_SEEK_SET );
712 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
713 n = VG_(read)( fh, file_buf, num_bytes );
714 if (n < 0) num_bytes = 0;
717 file_buf[num_bytes] = '\0';
718 VG_(close)(fh);
720 /* Parse file */
721 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
722 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
723 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
724 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
725 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
726 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
727 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
728 vai.hwcaps = VEX_PRID_COMP_MIPS;
729 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
730 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
731 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
732 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
733 else {
734 /* Did not find string in the proc file. */
735 vai.hwcaps = 0;
736 VG_(free)(file_buf);
737 return False;
740 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
742 if (NULL != isa) {
743 if (VG_(strstr) (isa, "mips32r1") != NULL)
744 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
745 if (VG_(strstr) (isa, "mips32r2") != NULL)
746 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
747 if (VG_(strstr) (isa, "mips32r6") != NULL)
748 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
749 if (VG_(strstr) (isa, "mips64r1") != NULL)
750 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
751 if (VG_(strstr) (isa, "mips64r2") != NULL)
752 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
753 if (VG_(strstr) (isa, "mips64r6") != NULL)
754 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
757 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
758 * decide to change incorrect settings in
759 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
760 * The current settings show mips32r1, mips32r2 and mips64r1 as
761 * unsupported ISAs by Cavium MIPS CPUs.
763 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
764 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
765 VEX_MIPS_CPU_ISA_M64R1;
767 } else {
769 * Kernel does not provide information about supported ISAs.
770 * Populate the isa level flags based on the CPU model. That is our
771 * best guess.
773 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
774 case VEX_PRID_COMP_CAVIUM:
775 case VEX_PRID_COMP_NETLOGIC:
776 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
777 /* fallthrough */
778 case VEX_PRID_COMP_INGENIC_E1:
779 case VEX_PRID_COMP_MIPS:
780 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
781 /* fallthrough */
782 case VEX_PRID_COMP_BROADCOM:
783 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
784 break;
785 case VEX_PRID_COMP_LEGACY:
786 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
787 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
788 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
789 break;
790 default:
791 break;
794 VG_(free)(file_buf);
795 return True;
798 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
800 #if defined(VGP_arm64_linux)
802 /* Check to see whether we are running on a Cavium core, and if so auto-enable
803 the fallback LLSC implementation. See #369459. */
805 static Bool VG_(parse_cpuinfo)(void)
807 const char *search_Cavium_str = "CPU implementer\t: 0x43";
809 Int n, fh;
810 SysRes fd;
811 SizeT num_bytes, file_buf_size;
812 HChar *file_buf;
814 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
815 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
816 if ( sr_isError(fd) ) return False;
818 fh = sr_Res(fd);
820 /* Determine the size of /proc/cpuinfo.
821 Work around broken-ness in /proc file system implementation.
822 fstat returns a zero size for /proc/cpuinfo although it is
823 claimed to be a regular file. */
824 num_bytes = 0;
825 file_buf_size = 1000;
826 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
827 while (42) {
828 n = VG_(read)(fh, file_buf, file_buf_size);
829 if (n < 0) break;
831 num_bytes += n;
832 if (n < file_buf_size) break; /* reached EOF */
835 if (n < 0) num_bytes = 0; /* read error; ignore contents */
837 if (num_bytes > file_buf_size) {
838 VG_(free)( file_buf );
839 VG_(lseek)( fh, 0, VKI_SEEK_SET );
840 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
841 n = VG_(read)( fh, file_buf, num_bytes );
842 if (n < 0) num_bytes = 0;
845 file_buf[num_bytes] = '\0';
846 VG_(close)(fh);
848 /* Parse file */
849 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
850 vai.arm64_requires_fallback_LLSC = True;
852 VG_(free)(file_buf);
853 return True;
856 #endif /* defined(VGP_arm64_linux) */
858 Bool VG_(machine_get_hwcaps)( void )
860 vg_assert(hwcaps_done == False);
861 hwcaps_done = True;
863 // Whack default settings into vai, so that we only need to fill in
864 // any interesting bits.
865 LibVEX_default_VexArchInfo(&vai);
867 #if defined(VGA_x86)
868 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
869 UInt eax, ebx, ecx, edx, max_extended;
870 HChar vstr[13];
871 vstr[0] = 0;
873 if (!VG_(has_cpuid)())
874 /* we can't do cpuid at all. Give up. */
875 return False;
877 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
878 if (eax < 1)
879 /* we can't ask for cpuid(x) for x > 0. Give up. */
880 return False;
882 /* Get processor ID string, and max basic/extended index
883 values. */
884 VG_(memcpy)(&vstr[0], &ebx, 4);
885 VG_(memcpy)(&vstr[4], &edx, 4);
886 VG_(memcpy)(&vstr[8], &ecx, 4);
887 vstr[12] = 0;
889 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
890 max_extended = eax;
892 /* get capabilities bits into edx */
893 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
895 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
896 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
897 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
899 /* cmpxchg8b is a minimum requirement now; if we don't have it we
900 must simply give up. But all CPUs since Pentium-I have it, so
901 that doesn't seem like much of a restriction. */
902 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
903 if (!have_cx8)
904 return False;
906 /* Figure out if this is an AMD that can do MMXEXT. */
907 have_mmxext = False;
908 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
909 && max_extended >= 0x80000001) {
910 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
911 /* Some older AMD processors support a sse1 subset (Integer SSE). */
912 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
915 /* Figure out if this is an AMD or Intel that can do LZCNT. */
916 have_lzcnt = False;
917 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
918 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
919 && max_extended >= 0x80000001) {
920 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
921 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
924 /* Intel processors don't define the mmxext extension, but since it
925 is just a sse1 subset always define it when we have sse1. */
926 if (have_sse1)
927 have_mmxext = True;
929 va = VexArchX86;
930 vai.endness = VexEndnessLE;
932 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
933 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
934 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
935 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
936 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
937 if (have_lzcnt)
938 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
939 VG_(machine_x86_have_mxcsr) = 1;
940 } else if (have_sse2 && have_sse1 && have_mmxext) {
941 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
942 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
943 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
944 if (have_lzcnt)
945 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
946 VG_(machine_x86_have_mxcsr) = 1;
947 } else if (have_sse1 && have_mmxext) {
948 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
949 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
950 VG_(machine_x86_have_mxcsr) = 1;
951 } else if (have_mmxext) {
952 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
953 VG_(machine_x86_have_mxcsr) = 0;
954 } else {
955 vai.hwcaps = 0; /*baseline - no sse at all*/
956 VG_(machine_x86_have_mxcsr) = 0;
959 VG_(machine_get_cache_info)(&vai);
961 return True;
964 #elif defined(VGA_amd64)
965 { Bool have_sse3, have_ssse3, have_cx8, have_cx16;
966 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
967 Bool have_rdtscp, have_rdrand, have_f16c;
968 UInt eax, ebx, ecx, edx, max_basic, max_extended;
969 ULong xgetbv_0 = 0;
970 HChar vstr[13];
971 vstr[0] = 0;
973 have_sse3 = have_ssse3 = have_cx8 = have_cx16
974 = have_lzcnt = have_avx = have_bmi = have_avx2
975 = have_rdtscp = have_rdrand = have_f16c = False;
977 eax = ebx = ecx = edx = max_basic = max_extended = 0;
979 if (!VG_(has_cpuid)())
980 /* we can't do cpuid at all. Give up. */
981 return False;
983 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
984 max_basic = eax;
985 if (max_basic < 1)
986 /* we can't ask for cpuid(x) for x > 0. Give up. */
987 return False;
989 /* Get processor ID string, and max basic/extended index
990 values. */
991 VG_(memcpy)(&vstr[0], &ebx, 4);
992 VG_(memcpy)(&vstr[4], &edx, 4);
993 VG_(memcpy)(&vstr[8], &ecx, 4);
994 vstr[12] = 0;
996 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
997 max_extended = eax;
999 /* get capabilities bits into edx */
1000 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
1002 // we assume that SSE1 and SSE2 are available by default
1003 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
1004 have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */
1005 // fma is ecx:12
1006 // sse41 is ecx:19
1007 // sse42 is ecx:20
1008 // xsave is ecx:26
1009 // osxsave is ecx:27
1010 // avx is ecx:28
1011 have_f16c = (ecx & (1<<29)) != 0; /* True => have F16C insns */
1012 have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */
1014 have_avx = False;
1015 /* have_fma = False; */
1016 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1017 /* Processor supports AVX instructions and XGETBV is enabled
1018 by OS and AVX instructions are enabled by the OS. */
1019 ULong w;
1020 __asm__ __volatile__("movq $0,%%rcx ; "
1021 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1022 "movq %%rax,%0"
1023 :/*OUT*/"=r"(w) :/*IN*/
1024 :/*TRASH*/"rdx","rcx","rax");
1025 xgetbv_0 = w;
1026 if ((xgetbv_0 & 7) == 7) {
1027 /* Only say we have AVX if the XSAVE-allowable
1028 bitfield-mask allows x87, SSE and AVX state. We could
1029 actually run with a more restrictive XGETBV(0) value,
1030 but VEX's implementation of XSAVE and XRSTOR assumes
1031 that all 3 bits are enabled.
1033 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1034 state component [2] (the YMM high halves) are located in
1035 the XSAVE image at offsets 576 .. 831. So we have to
1036 check that here before declaring AVX to be supported. */
1037 UInt eax2, ebx2, ecx2, edx2;
1038 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1039 if (ebx2 == 576 && eax2 == 256) {
1040 have_avx = True;
1042 /* have_fma = (ecx & (1<<12)) != 0; */
1043 /* have_fma: Probably correct, but gcc complains due to
1044 unusedness. */
1048 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1049 must simply give up. But all CPUs since Pentium-I have it, so
1050 that doesn't seem like much of a restriction. */
1051 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1052 if (!have_cx8)
1053 return False;
1055 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1056 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1058 /* Figure out if this CPU can do LZCNT. */
1059 have_lzcnt = False;
1060 if (max_extended >= 0x80000001) {
1061 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1062 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1065 /* Can we do RDTSCP? */
1066 have_rdtscp = False;
1067 if (max_extended >= 0x80000001) {
1068 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1069 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1072 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1073 have_bmi = False;
1074 have_avx2 = False;
1075 if (have_avx && max_basic >= 7) {
1076 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1077 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1078 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1081 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1082 it's convenient to restrict them to the AVX case since the simulated
1083 CPUID we'll offer them on has AVX as a base. */
1084 if (!have_avx) {
1085 have_f16c = False;
1086 have_rdrand = False;
1089 va = VexArchAMD64;
1090 vai.endness = VexEndnessLE;
1091 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1092 | (have_ssse3 ? VEX_HWCAPS_AMD64_SSSE3 : 0)
1093 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1094 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1095 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1096 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1097 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1098 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0)
1099 | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0)
1100 | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0);
1102 VG_(machine_get_cache_info)(&vai);
1104 return True;
1107 #elif defined(VGA_ppc32)
1109 /* Find out which subset of the ppc32 instruction set is supported by
1110 verifying whether various ppc32 instructions generate a SIGILL
1111 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1112 AT_PLATFORM entries in the ELF auxiliary table -- see also
1113 the_iifii.client_auxv in m_main.c.
1115 vki_sigset_t saved_set, tmp_set;
1116 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1117 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1119 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1120 volatile Bool have_isa_2_07, have_isa_3_0;
1121 Int r;
1123 /* This is a kludge. Really we ought to back-convert saved_act
1124 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1125 since that's a no-op on all ppc32 platforms so far supported,
1126 it's not worth the typing effort. At least include most basic
1127 sanity check: */
1128 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1130 VG_(sigemptyset)(&tmp_set);
1131 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1132 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1134 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1135 vg_assert(r == 0);
1137 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1138 vg_assert(r == 0);
1139 tmp_sigill_act = saved_sigill_act;
1141 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1142 vg_assert(r == 0);
1143 tmp_sigfpe_act = saved_sigfpe_act;
1145 /* NODEFER: signal handler does not return (from the kernel's point of
1146 view), hence if it is to successfully catch a signal more than once,
1147 we need the NODEFER flag. */
1148 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1149 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1150 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1151 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1152 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1153 vg_assert(r == 0);
1155 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1156 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1157 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1158 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1159 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1160 vg_assert(r == 0);
1162 /* standard FP insns */
1163 have_F = True;
1164 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1165 have_F = False;
1166 } else {
1167 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1170 /* Altivec insns */
1171 have_V = True;
1172 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1173 have_V = False;
1174 } else {
1175 /* Unfortunately some older assemblers don't speak Altivec (or
1176 choose not to), so to be safe we directly emit the 32-bit
1177 word corresponding to "vor 0,0,0". This fixes a build
1178 problem that happens on Debian 3.1 (ppc32), and probably
1179 various other places. */
1180 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1183 /* General-Purpose optional (fsqrt, fsqrts) */
1184 have_FX = True;
1185 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1186 have_FX = False;
1187 } else {
1188 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1191 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1192 have_GX = True;
1193 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1194 have_GX = False;
1195 } else {
1196 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1199 /* VSX support implies Power ISA 2.06 */
1200 have_VX = True;
1201 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1202 have_VX = False;
1203 } else {
1204 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1207 /* Check for Decimal Floating Point (DFP) support. */
1208 have_DFP = True;
1209 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1210 have_DFP = False;
1211 } else {
1212 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1215 /* Check for ISA 2.07 support. */
1216 have_isa_2_07 = True;
1217 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1218 have_isa_2_07 = False;
1219 } else {
1220 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1223 /* Check for ISA 3.0 support. */
1224 have_isa_3_0 = True;
1225 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1226 have_isa_3_0 = False;
1227 } else {
1228 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1231 /* determine dcbz/dcbzl sizes while we still have the signal
1232 * handlers registered */
1233 find_ppc_dcbz_sz(&vai);
1235 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1236 vg_assert(r == 0);
1237 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1238 vg_assert(r == 0);
1239 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1240 vg_assert(r == 0);
1241 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1242 (Int)have_F, (Int)have_V, (Int)have_FX,
1243 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1244 (Int)have_isa_2_07, (Int)have_isa_3_0);
1245 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1246 if (have_V && !have_F)
1247 have_V = False;
1248 if (have_FX && !have_F)
1249 have_FX = False;
1250 if (have_GX && !have_F)
1251 have_GX = False;
1253 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1254 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1256 va = VexArchPPC32;
1257 vai.endness = VexEndnessBE;
1259 vai.hwcaps = 0;
1260 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1261 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1262 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1263 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1264 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1265 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1266 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1267 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1269 VG_(machine_get_cache_info)(&vai);
1271 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1272 called before we're ready to go. */
1273 return True;
1276 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1278 /* Same instruction set detection algorithm as for ppc32. */
1279 vki_sigset_t saved_set, tmp_set;
1280 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1281 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1283 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1284 volatile Bool have_isa_2_07, have_isa_3_0;
1285 Int r;
1287 /* This is a kludge. Really we ought to back-convert saved_act
1288 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1289 since that's a no-op on all ppc64 platforms so far supported,
1290 it's not worth the typing effort. At least include most basic
1291 sanity check: */
1292 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1294 VG_(sigemptyset)(&tmp_set);
1295 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1296 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1298 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1299 vg_assert(r == 0);
1301 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1302 vg_assert(r == 0);
1303 tmp_sigill_act = saved_sigill_act;
1305 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1306 tmp_sigfpe_act = saved_sigfpe_act;
1308 /* NODEFER: signal handler does not return (from the kernel's point of
1309 view), hence if it is to successfully catch a signal more than once,
1310 we need the NODEFER flag. */
1311 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1312 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1313 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1314 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1315 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1317 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1318 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1319 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1320 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1321 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1323 /* standard FP insns */
1324 have_F = True;
1325 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1326 have_F = False;
1327 } else {
1328 __asm__ __volatile__("fmr 0,0");
1331 /* Altivec insns */
1332 have_V = True;
1333 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1334 have_V = False;
1335 } else {
1336 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1339 /* General-Purpose optional (fsqrt, fsqrts) */
1340 have_FX = True;
1341 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1342 have_FX = False;
1343 } else {
1344 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1347 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1348 have_GX = True;
1349 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1350 have_GX = False;
1351 } else {
1352 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1355 /* VSX support implies Power ISA 2.06 */
1356 have_VX = True;
1357 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1358 have_VX = False;
1359 } else {
1360 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1363 /* Check for Decimal Floating Point (DFP) support. */
1364 have_DFP = True;
1365 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1366 have_DFP = False;
1367 } else {
1368 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1371 /* Check for ISA 2.07 support. */
1372 have_isa_2_07 = True;
1373 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1374 have_isa_2_07 = False;
1375 } else {
1376 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1379 /* Check for ISA 3.0 support. */
1380 have_isa_3_0 = True;
1381 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1382 have_isa_3_0 = False;
1383 } else {
1384 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1387 /* determine dcbz/dcbzl sizes while we still have the signal
1388 * handlers registered */
1389 find_ppc_dcbz_sz(&vai);
1391 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1392 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1393 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1394 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1395 (Int)have_F, (Int)have_V, (Int)have_FX,
1396 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1397 (Int)have_isa_2_07, (int)have_isa_3_0);
1398 /* on ppc64be, if we don't even have FP, just give up. */
1399 if (!have_F)
1400 return False;
1402 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1404 va = VexArchPPC64;
1405 # if defined(VKI_LITTLE_ENDIAN)
1406 vai.endness = VexEndnessLE;
1407 # elif defined(VKI_BIG_ENDIAN)
1408 vai.endness = VexEndnessBE;
1409 # else
1410 vai.endness = VexEndness_INVALID;
1411 # endif
1413 vai.hwcaps = 0;
1414 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1415 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1416 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1417 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1418 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1419 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1420 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1422 VG_(machine_get_cache_info)(&vai);
1424 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1425 called before we're ready to go. */
1426 return True;
1429 #elif defined(VGA_s390x)
1431 # include "libvex_s390x_common.h"
1434 /* Instruction set detection code borrowed from ppc above. */
1435 vki_sigset_t saved_set, tmp_set;
1436 vki_sigaction_fromK_t saved_sigill_act;
1437 vki_sigaction_toK_t tmp_sigill_act;
1439 volatile Bool have_LDISP, have_STFLE;
1440 Int i, r, model;
1442 /* If the model is "unknown" don't treat this as an error. Assume
1443 this is a brand-new machine model for which we don't have the
1444 identification yet. Keeping fingers crossed. */
1445 model = VG_(get_machine_model)();
1447 /* Unblock SIGILL and stash away the old action for that signal */
1448 VG_(sigemptyset)(&tmp_set);
1449 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1451 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1452 vg_assert(r == 0);
1454 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1455 vg_assert(r == 0);
1456 tmp_sigill_act = saved_sigill_act;
1458 /* NODEFER: signal handler does not return (from the kernel's point of
1459 view), hence if it is to successfully catch a signal more than once,
1460 we need the NODEFER flag. */
1461 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1462 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1463 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1464 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1465 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1467 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1468 is not supported on z900. */
1470 have_LDISP = True;
1471 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1472 have_LDISP = False;
1473 } else {
1474 /* BASR loads the address of the next insn into r1. Needed to avoid
1475 a segfault in XY. */
1476 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1477 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1478 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1481 /* Check availability of STFLE. If available store facility bits
1482 in hoststfle. */
1483 ULong hoststfle[S390_NUM_FACILITY_DW];
1485 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1486 hoststfle[i] = 0;
1488 have_STFLE = True;
1489 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1490 have_STFLE = False;
1491 } else {
1492 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1494 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1495 : "=m" (hoststfle), "+d"(reg0)
1496 : : "cc", "memory");
1499 /* Restore signals */
1500 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1501 vg_assert(r == 0);
1502 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1503 vg_assert(r == 0);
1504 va = VexArchS390X;
1505 vai.endness = VexEndnessBE;
1507 vai.hwcaps = model;
1508 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1509 if (have_LDISP) {
1510 /* Use long displacement only on machines >= z990. For all other
1511 machines it is millicoded and therefore slow. */
1512 if (model >= VEX_S390X_MODEL_Z990)
1513 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1516 /* Detect presence of certain facilities using the STFLE insn.
1517 Note, that these facilities were introduced at the same time or later
1518 as STFLE, so the absence of STLFE implies the absence of the facility
1519 we're trying to detect. */
1520 struct fac_hwcaps_map {
1521 UInt installed;
1522 UInt facility_bit;
1523 UInt hwcaps_bit;
1524 const HChar name[6]; // may need adjustment for new facility names
1525 } fac_hwcaps[] = {
1526 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1527 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1528 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1529 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1530 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1531 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1532 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1533 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1534 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1535 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1536 { False, S390_FAC_VX, VEX_HWCAPS_S390X_VX, "VX" },
1537 { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" },
1538 { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" },
1541 /* Set hwcaps according to the detected facilities */
1542 UChar dw_number = 0;
1543 UChar fac_bit = 0;
1544 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1545 vg_assert(fac_hwcaps[i].facility_bit <= 191); // for now
1546 dw_number = fac_hwcaps[i].facility_bit / 64;
1547 fac_bit = fac_hwcaps[i].facility_bit % 64;
1548 if (hoststfle[dw_number] & (1ULL << (63 - fac_bit))) {
1549 fac_hwcaps[i].installed = True;
1550 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1554 /* Build up a string showing the probed-for facilities */
1555 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1556 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1557 7 + 1 + 4 + 2 // machine %4d
1558 + 1]; // \0
1559 HChar *p = fac_str;
1560 p += VG_(sprintf)(p, "machine %4d ", model);
1561 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1562 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1563 fac_hwcaps[i].installed);
1565 *p++ = '\0';
1567 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1568 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1570 VG_(machine_get_cache_info)(&vai);
1572 return True;
1575 #elif defined(VGA_arm)
1577 /* Same instruction set detection algorithm as for ppc32. */
1578 vki_sigset_t saved_set, tmp_set;
1579 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1580 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1582 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1583 volatile Int archlevel;
1584 Int r;
1586 /* This is a kludge. Really we ought to back-convert saved_act
1587 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1588 since that's a no-op on all ppc64 platforms so far supported,
1589 it's not worth the typing effort. At least include most basic
1590 sanity check: */
1591 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1593 VG_(sigemptyset)(&tmp_set);
1594 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1595 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1597 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1598 vg_assert(r == 0);
1600 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1601 vg_assert(r == 0);
1602 tmp_sigill_act = saved_sigill_act;
1604 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1605 tmp_sigfpe_act = saved_sigfpe_act;
1607 /* NODEFER: signal handler does not return (from the kernel's point of
1608 view), hence if it is to successfully catch a signal more than once,
1609 we need the NODEFER flag. */
1610 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1611 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1612 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1613 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1614 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1616 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1617 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1618 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1619 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1620 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1622 /* VFP insns */
1623 have_VFP = True;
1624 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1625 have_VFP = False;
1626 } else {
1627 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1629 /* There are several generation of VFP extension but they differs very
1630 little so for now we will not distinguish them. */
1631 have_VFP2 = have_VFP;
1632 have_VFP3 = have_VFP;
1634 /* NEON insns */
1635 have_NEON = True;
1636 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1637 have_NEON = False;
1638 } else {
1639 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1642 /* ARM architecture level */
1643 archlevel = 5; /* v5 will be base level */
1644 if (archlevel < 7) {
1645 archlevel = 7;
1646 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1647 archlevel = 5;
1648 } else {
1649 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1652 if (archlevel < 6) {
1653 archlevel = 6;
1654 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1655 archlevel = 5;
1656 } else {
1657 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1661 /* ARMv8 insns */
1662 have_V8 = True;
1663 if (archlevel == 7) {
1664 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1665 have_V8 = False;
1666 } else {
1667 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1669 if (have_V8 && have_NEON && have_VFP3) {
1670 archlevel = 8;
1674 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1675 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1676 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1677 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1678 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1680 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1681 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1682 (Int)have_NEON);
1684 VG_(machine_arm_archlevel) = archlevel;
1686 va = VexArchARM;
1687 vai.endness = VexEndnessLE;
1689 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1690 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1691 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1692 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1693 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1695 VG_(machine_get_cache_info)(&vai);
1697 return True;
1700 #elif defined(VGA_arm64)
1702 va = VexArchARM64;
1703 vai.endness = VexEndnessLE;
1705 /* So far there are no variants. */
1706 vai.hwcaps = 0;
1708 VG_(machine_get_cache_info)(&vai);
1710 /* Check whether we need to use the fallback LLSC implementation.
1711 If the check fails, give up. */
1712 if (! VG_(parse_cpuinfo)())
1713 return False;
1715 /* 0 denotes 'not set'. The range of legitimate values here,
1716 after being set that is, is 2 though 17 inclusive. */
1717 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1718 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1719 ULong ctr_el0;
1720 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1721 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1722 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1723 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1724 "ctr_el0.iMinLine_szB = %d\n",
1725 1 << vai.arm64_dMinLine_lg2_szB,
1726 1 << vai.arm64_iMinLine_lg2_szB);
1727 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1728 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1730 return True;
1733 #elif defined(VGA_mips32)
1735 /* Define the position of F64 bit in FIR register. */
1736 # define FP64 22
1737 va = VexArchMIPS32;
1738 if (!VG_(parse_cpuinfo)())
1739 return False;
1741 # if defined(VKI_LITTLE_ENDIAN)
1742 vai.endness = VexEndnessLE;
1743 # elif defined(VKI_BIG_ENDIAN)
1744 vai.endness = VexEndnessBE;
1745 # else
1746 vai.endness = VexEndness_INVALID;
1747 # endif
1749 /* Same instruction set detection algorithm as for ppc32/arm... */
1750 vki_sigset_t saved_set, tmp_set;
1751 vki_sigaction_fromK_t saved_sigill_act;
1752 vki_sigaction_toK_t tmp_sigill_act;
1754 volatile Bool have_DSP, have_DSPr2, have_MSA;
1755 Int r;
1757 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1759 VG_(sigemptyset)(&tmp_set);
1760 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1762 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1763 vg_assert(r == 0);
1765 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1766 vg_assert(r == 0);
1767 tmp_sigill_act = saved_sigill_act;
1769 /* NODEFER: signal handler does not return (from the kernel's point of
1770 view), hence if it is to successfully catch a signal more than once,
1771 we need the NODEFER flag. */
1772 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1773 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1774 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1775 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1776 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1778 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1780 /* MSA instructions. */
1781 have_MSA = True;
1782 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1783 have_MSA = False;
1784 } else {
1785 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1787 if (have_MSA) {
1788 vai.hwcaps |= VEX_PRID_IMP_P5600;
1789 } else {
1790 /* DSPr2 instructions. */
1791 have_DSPr2 = True;
1792 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1793 have_DSPr2 = False;
1794 } else {
1795 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1797 if (have_DSPr2) {
1798 /* We assume it's 74K, since it can run DSPr2. */
1799 vai.hwcaps |= VEX_PRID_IMP_74K;
1800 } else {
1801 /* DSP instructions. */
1802 have_DSP = True;
1803 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1804 have_DSP = False;
1805 } else {
1806 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1808 if (have_DSP) {
1809 /* We assume it's 34K, since it has support for DSP. */
1810 vai.hwcaps |= VEX_PRID_IMP_34K;
1816 # if defined(VGP_mips32_linux)
1817 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
1818 # else
1819 Int fpmode = -1;
1820 # endif
1822 if (fpmode < 0) {
1823 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1824 we are using alternative way to determine FP mode */
1825 ULong result = 0;
1827 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
1828 __asm__ volatile (
1829 ".set push\n\t"
1830 ".set noreorder\n\t"
1831 ".set oddspreg\n\t"
1832 ".set hardfloat\n\t"
1833 "lui $t0, 0x3FF0\n\t"
1834 "ldc1 $f0, %0\n\t"
1835 "mtc1 $t0, $f1\n\t"
1836 "sdc1 $f0, %0\n\t"
1837 ".set pop\n\t"
1838 : "+m"(result)
1840 : "t0", "$f0", "$f1", "memory");
1842 fpmode = (result != 0x3FF0000000000000ull);
1846 if (fpmode != 0)
1847 vai.hwcaps |= VEX_MIPS_HOST_FR;
1849 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1850 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1851 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1853 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1854 VG_(machine_get_cache_info)(&vai);
1856 return True;
1859 #elif defined(VGA_mips64)
1861 va = VexArchMIPS64;
1862 if (!VG_(parse_cpuinfo)())
1863 return False;
1865 # if defined(VKI_LITTLE_ENDIAN)
1866 vai.endness = VexEndnessLE;
1867 # elif defined(VKI_BIG_ENDIAN)
1868 vai.endness = VexEndnessBE;
1869 # else
1870 vai.endness = VexEndness_INVALID;
1871 # endif
1873 vai.hwcaps |= VEX_MIPS_HOST_FR;
1875 /* Same instruction set detection algorithm as for ppc32/arm... */
1876 vki_sigset_t saved_set, tmp_set;
1877 vki_sigaction_fromK_t saved_sigill_act;
1878 vki_sigaction_toK_t tmp_sigill_act;
1880 volatile Bool have_MSA;
1881 Int r;
1883 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1885 VG_(sigemptyset)(&tmp_set);
1886 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1888 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1889 vg_assert(r == 0);
1891 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1892 vg_assert(r == 0);
1893 tmp_sigill_act = saved_sigill_act;
1895 /* NODEFER: signal handler does not return (from the kernel's point of
1896 view), hence if it is to successfully catch a signal more than once,
1897 we need the NODEFER flag. */
1898 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1899 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1900 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1901 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1902 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1904 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1906 /* MSA instructions */
1907 have_MSA = True;
1908 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1909 have_MSA = False;
1910 } else {
1911 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1913 if (have_MSA) {
1914 vai.hwcaps |= VEX_PRID_IMP_P5600;
1918 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1919 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1920 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1922 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1924 VG_(machine_get_cache_info)(&vai);
1926 return True;
1929 #else
1930 # error "Unknown arch"
1931 #endif
1934 /* Notify host cpu instruction cache line size. */
1935 #if defined(VGA_ppc32)
1936 void VG_(machine_ppc32_set_clszB)( Int szB )
1938 vg_assert(hwcaps_done);
1940 /* Either the value must not have been set yet (zero) or we can
1941 tolerate it being set to the same value multiple times, as the
1942 stack scanning logic in m_main is a bit stupid. */
1943 vg_assert(vai.ppc_icache_line_szB == 0
1944 || vai.ppc_icache_line_szB == szB);
1946 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1947 vai.ppc_icache_line_szB = szB;
1949 #endif
1952 /* Notify host cpu instruction cache line size. */
1953 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1954 void VG_(machine_ppc64_set_clszB)( Int szB )
1956 vg_assert(hwcaps_done);
1958 /* Either the value must not have been set yet (zero) or we can
1959 tolerate it being set to the same value multiple times, as the
1960 stack scanning logic in m_main is a bit stupid. */
1961 vg_assert(vai.ppc_icache_line_szB == 0
1962 || vai.ppc_icache_line_szB == szB);
1964 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1965 vai.ppc_icache_line_szB = szB;
1967 #endif
1970 /* Notify host's ability to handle NEON instructions. */
1971 #if defined(VGA_arm)
1972 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1974 vg_assert(hwcaps_done);
1975 /* There's nothing else we can sanity check. */
1977 if (has_neon) {
1978 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1979 } else {
1980 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1983 #endif
1986 /* Fetch host cpu info, once established. */
1987 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1988 /*OUT*/VexArchInfo* pVai )
1990 vg_assert(hwcaps_done);
1991 if (pVa) *pVa = va;
1992 if (pVai) *pVai = vai;
1996 /* Returns the size of the largest guest register that we will
1997 simulate in this run. This depends on both the guest architecture
1998 and on the specific capabilities we are simulating for that guest
1999 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2000 or 32. General rule: if in doubt, return a value larger than
2001 reality.
2003 This information is needed by Cachegrind and Callgrind to decide
2004 what the minimum cache line size they are prepared to simulate is.
2005 Basically require that the minimum cache line size is at least as
2006 large as the largest register that might get transferred to/from
2007 memory, so as to guarantee that any such transaction can straddle
2008 at most 2 cache lines.
2010 Int VG_(machine_get_size_of_largest_guest_register) ( void )
2012 vg_assert(hwcaps_done);
2013 /* Once hwcaps_done is True, we can fish around inside va/vai to
2014 find the information we need. */
2016 # if defined(VGA_x86)
2017 vg_assert(va == VexArchX86);
2018 /* We don't support AVX, so 32 is out. At the other end, even if
2019 we don't support any SSE, the X87 can generate 10 byte
2020 transfers, so let's say 16 to be on the safe side. Hence the
2021 answer is always 16. */
2022 return 16;
2024 # elif defined(VGA_amd64)
2025 /* if AVX then 32 else 16 */
2026 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
2028 # elif defined(VGA_ppc32)
2029 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2030 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
2031 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
2032 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
2033 return 8;
2035 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2036 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2037 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
2038 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
2039 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
2040 return 8;
2042 # elif defined(VGA_s390x)
2043 return 8;
2045 # elif defined(VGA_arm)
2046 /* Really it depends whether or not we have NEON, but let's just
2047 assume we always do. */
2048 return 16;
2050 # elif defined(VGA_arm64)
2051 /* ARM64 always has Neon, AFAICS. */
2052 return 16;
2054 # elif defined(VGA_mips32)
2055 /* The guest state implies 4, but that can't really be true, can
2056 it? */
2057 return 8;
2059 # elif defined(VGA_mips64)
2060 return 8;
2062 # else
2063 # error "Unknown arch"
2064 # endif
2068 // Given a pointer to a function as obtained by "& functionname" in C,
2069 // produce a pointer to the actual entry point for the function.
2070 void* VG_(fnptr_to_fnentry)( void* f )
2072 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2073 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2074 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2075 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2076 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2077 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2078 return f;
2079 # elif defined(VGP_ppc64be_linux)
2080 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2081 3-word function descriptor, of which the first word is the entry
2082 address. */
2083 UWord* descr = (UWord*)f;
2084 return (void*)(descr[0]);
2085 # else
2086 # error "Unknown platform"
2087 # endif
2090 /*--------------------------------------------------------------------*/
2091 /*--- end ---*/
2092 /*--------------------------------------------------------------------*/