Add DRD suppression patterns for races triggered by std::ostream
[valgrind.git] / coregrind / m_machine.c
blobf6ec7d52dcdf80b6b0d480433a49dcfc3aa2d780
1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
9 Copyright (C) 2000-2017 Julian Seward
10 jseward@acm.org
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_libcproc.h"
38 #include "pub_core_mallocfree.h"
39 #include "pub_core_machine.h"
40 #include "pub_core_cpuid.h"
41 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
42 #include "pub_core_debuglog.h"
45 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
46 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
47 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
49 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
51 Addr VG_(get_IP) ( ThreadId tid ) {
52 return INSTR_PTR( VG_(threads)[tid].arch );
54 Addr VG_(get_SP) ( ThreadId tid ) {
55 return STACK_PTR( VG_(threads)[tid].arch );
57 Addr VG_(get_FP) ( ThreadId tid ) {
58 return FRAME_PTR( VG_(threads)[tid].arch );
61 Addr VG_(get_SP_s1) ( ThreadId tid ) {
62 return STACK_PTR_S1( VG_(threads)[tid].arch );
64 void VG_(set_SP_s1) ( ThreadId tid, Addr sp ) {
65 STACK_PTR_S1( VG_(threads)[tid].arch ) = sp;
68 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
69 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
71 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
72 STACK_PTR( VG_(threads)[tid].arch ) = sp;
75 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
76 ThreadId tid )
78 # if defined(VGA_x86)
79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
81 regs->misc.X86.r_ebp
82 = VG_(threads)[tid].arch.vex.guest_EBP;
83 # elif defined(VGA_amd64)
84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
86 regs->misc.AMD64.r_rbp
87 = VG_(threads)[tid].arch.vex.guest_RBP;
88 # elif defined(VGA_ppc32)
89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
91 regs->misc.PPC32.r_lr
92 = VG_(threads)[tid].arch.vex.guest_LR;
93 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
94 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
95 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
96 regs->misc.PPC64.r_lr
97 = VG_(threads)[tid].arch.vex.guest_LR;
98 # elif defined(VGA_arm)
99 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
100 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
101 regs->misc.ARM.r14
102 = VG_(threads)[tid].arch.vex.guest_R14;
103 regs->misc.ARM.r12
104 = VG_(threads)[tid].arch.vex.guest_R12;
105 regs->misc.ARM.r11
106 = VG_(threads)[tid].arch.vex.guest_R11;
107 regs->misc.ARM.r7
108 = VG_(threads)[tid].arch.vex.guest_R7;
109 # elif defined(VGA_arm64)
110 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
111 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
112 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
113 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
114 # elif defined(VGA_s390x)
115 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
116 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
117 regs->misc.S390X.r_fp
118 = VG_(threads)[tid].arch.vex.guest_FP;
119 regs->misc.S390X.r_lr
120 = VG_(threads)[tid].arch.vex.guest_LR;
121 # elif defined(VGA_mips32)
122 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
123 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
124 regs->misc.MIPS32.r30
125 = VG_(threads)[tid].arch.vex.guest_r30;
126 regs->misc.MIPS32.r31
127 = VG_(threads)[tid].arch.vex.guest_r31;
128 regs->misc.MIPS32.r28
129 = VG_(threads)[tid].arch.vex.guest_r28;
130 # elif defined(VGA_mips64)
131 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
132 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
133 regs->misc.MIPS64.r30
134 = VG_(threads)[tid].arch.vex.guest_r30;
135 regs->misc.MIPS64.r31
136 = VG_(threads)[tid].arch.vex.guest_r31;
137 regs->misc.MIPS64.r28
138 = VG_(threads)[tid].arch.vex.guest_r28;
139 # else
140 # error "Unknown arch"
141 # endif
144 void
145 VG_(get_shadow_regs_area) ( ThreadId tid,
146 /*DST*/UChar* dst,
147 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
149 void* src;
150 ThreadState* tst;
151 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
152 vg_assert(VG_(is_valid_tid)(tid));
153 // Bounds check
154 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
155 vg_assert(offset + size <= sizeof(VexGuestArchState));
156 // Copy
157 tst = & VG_(threads)[tid];
158 src = NULL;
159 switch (shadowNo) {
160 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
161 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
162 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
164 vg_assert(src != NULL);
165 VG_(memcpy)( dst, src, size);
168 void
169 VG_(set_shadow_regs_area) ( ThreadId tid,
170 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
171 /*SRC*/const UChar* src )
173 void* dst;
174 ThreadState* tst;
175 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
176 vg_assert(VG_(is_valid_tid)(tid));
177 // Bounds check
178 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
179 vg_assert(offset + size <= sizeof(VexGuestArchState));
180 // Copy
181 tst = & VG_(threads)[tid];
182 dst = NULL;
183 switch (shadowNo) {
184 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
185 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
186 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
188 vg_assert(dst != NULL);
189 VG_(memcpy)( dst, src, size);
193 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
194 const HChar*, Addr))
196 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
197 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
198 #if defined(VGA_x86)
199 (*f)(tid, "EAX", vex->guest_EAX);
200 (*f)(tid, "ECX", vex->guest_ECX);
201 (*f)(tid, "EDX", vex->guest_EDX);
202 (*f)(tid, "EBX", vex->guest_EBX);
203 (*f)(tid, "ESI", vex->guest_ESI);
204 (*f)(tid, "EDI", vex->guest_EDI);
205 (*f)(tid, "ESP", vex->guest_ESP);
206 (*f)(tid, "EBP", vex->guest_EBP);
207 #elif defined(VGA_amd64)
208 (*f)(tid, "RAX", vex->guest_RAX);
209 (*f)(tid, "RCX", vex->guest_RCX);
210 (*f)(tid, "RDX", vex->guest_RDX);
211 (*f)(tid, "RBX", vex->guest_RBX);
212 (*f)(tid, "RSI", vex->guest_RSI);
213 (*f)(tid, "RDI", vex->guest_RDI);
214 (*f)(tid, "RSP", vex->guest_RSP);
215 (*f)(tid, "RBP", vex->guest_RBP);
216 (*f)(tid, "R8" , vex->guest_R8 );
217 (*f)(tid, "R9" , vex->guest_R9 );
218 (*f)(tid, "R10", vex->guest_R10);
219 (*f)(tid, "R11", vex->guest_R11);
220 (*f)(tid, "R12", vex->guest_R12);
221 (*f)(tid, "R13", vex->guest_R13);
222 (*f)(tid, "R14", vex->guest_R14);
223 (*f)(tid, "R15", vex->guest_R15);
224 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
225 (*f)(tid, "GPR0" , vex->guest_GPR0 );
226 (*f)(tid, "GPR1" , vex->guest_GPR1 );
227 (*f)(tid, "GPR2" , vex->guest_GPR2 );
228 (*f)(tid, "GPR3" , vex->guest_GPR3 );
229 (*f)(tid, "GPR4" , vex->guest_GPR4 );
230 (*f)(tid, "GPR5" , vex->guest_GPR5 );
231 (*f)(tid, "GPR6" , vex->guest_GPR6 );
232 (*f)(tid, "GPR7" , vex->guest_GPR7 );
233 (*f)(tid, "GPR8" , vex->guest_GPR8 );
234 (*f)(tid, "GPR9" , vex->guest_GPR9 );
235 (*f)(tid, "GPR10", vex->guest_GPR10);
236 (*f)(tid, "GPR11", vex->guest_GPR11);
237 (*f)(tid, "GPR12", vex->guest_GPR12);
238 (*f)(tid, "GPR13", vex->guest_GPR13);
239 (*f)(tid, "GPR14", vex->guest_GPR14);
240 (*f)(tid, "GPR15", vex->guest_GPR15);
241 (*f)(tid, "GPR16", vex->guest_GPR16);
242 (*f)(tid, "GPR17", vex->guest_GPR17);
243 (*f)(tid, "GPR18", vex->guest_GPR18);
244 (*f)(tid, "GPR19", vex->guest_GPR19);
245 (*f)(tid, "GPR20", vex->guest_GPR20);
246 (*f)(tid, "GPR21", vex->guest_GPR21);
247 (*f)(tid, "GPR22", vex->guest_GPR22);
248 (*f)(tid, "GPR23", vex->guest_GPR23);
249 (*f)(tid, "GPR24", vex->guest_GPR24);
250 (*f)(tid, "GPR25", vex->guest_GPR25);
251 (*f)(tid, "GPR26", vex->guest_GPR26);
252 (*f)(tid, "GPR27", vex->guest_GPR27);
253 (*f)(tid, "GPR28", vex->guest_GPR28);
254 (*f)(tid, "GPR29", vex->guest_GPR29);
255 (*f)(tid, "GPR30", vex->guest_GPR30);
256 (*f)(tid, "GPR31", vex->guest_GPR31);
257 (*f)(tid, "CTR" , vex->guest_CTR );
258 (*f)(tid, "LR" , vex->guest_LR );
259 #elif defined(VGA_arm)
260 (*f)(tid, "R0" , vex->guest_R0 );
261 (*f)(tid, "R1" , vex->guest_R1 );
262 (*f)(tid, "R2" , vex->guest_R2 );
263 (*f)(tid, "R3" , vex->guest_R3 );
264 (*f)(tid, "R4" , vex->guest_R4 );
265 (*f)(tid, "R5" , vex->guest_R5 );
266 (*f)(tid, "R6" , vex->guest_R6 );
267 (*f)(tid, "R8" , vex->guest_R8 );
268 (*f)(tid, "R9" , vex->guest_R9 );
269 (*f)(tid, "R10", vex->guest_R10);
270 (*f)(tid, "R11", vex->guest_R11);
271 (*f)(tid, "R12", vex->guest_R12);
272 (*f)(tid, "R13", vex->guest_R13);
273 (*f)(tid, "R14", vex->guest_R14);
274 #elif defined(VGA_s390x)
275 (*f)(tid, "r0" , vex->guest_r0 );
276 (*f)(tid, "r1" , vex->guest_r1 );
277 (*f)(tid, "r2" , vex->guest_r2 );
278 (*f)(tid, "r3" , vex->guest_r3 );
279 (*f)(tid, "r4" , vex->guest_r4 );
280 (*f)(tid, "r5" , vex->guest_r5 );
281 (*f)(tid, "r6" , vex->guest_r6 );
282 (*f)(tid, "r7" , vex->guest_r7 );
283 (*f)(tid, "r8" , vex->guest_r8 );
284 (*f)(tid, "r9" , vex->guest_r9 );
285 (*f)(tid, "r10", vex->guest_r10);
286 (*f)(tid, "r11", vex->guest_r11);
287 (*f)(tid, "r12", vex->guest_r12);
288 (*f)(tid, "r13", vex->guest_r13);
289 (*f)(tid, "r14", vex->guest_r14);
290 (*f)(tid, "r15", vex->guest_r15);
291 #elif defined(VGA_mips32) || defined(VGA_mips64)
292 (*f)(tid, "r0" , vex->guest_r0 );
293 (*f)(tid, "r1" , vex->guest_r1 );
294 (*f)(tid, "r2" , vex->guest_r2 );
295 (*f)(tid, "r3" , vex->guest_r3 );
296 (*f)(tid, "r4" , vex->guest_r4 );
297 (*f)(tid, "r5" , vex->guest_r5 );
298 (*f)(tid, "r6" , vex->guest_r6 );
299 (*f)(tid, "r7" , vex->guest_r7 );
300 (*f)(tid, "r8" , vex->guest_r8 );
301 (*f)(tid, "r9" , vex->guest_r9 );
302 (*f)(tid, "r10", vex->guest_r10);
303 (*f)(tid, "r11", vex->guest_r11);
304 (*f)(tid, "r12", vex->guest_r12);
305 (*f)(tid, "r13", vex->guest_r13);
306 (*f)(tid, "r14", vex->guest_r14);
307 (*f)(tid, "r15", vex->guest_r15);
308 (*f)(tid, "r16", vex->guest_r16);
309 (*f)(tid, "r17", vex->guest_r17);
310 (*f)(tid, "r18", vex->guest_r18);
311 (*f)(tid, "r19", vex->guest_r19);
312 (*f)(tid, "r20", vex->guest_r20);
313 (*f)(tid, "r21", vex->guest_r21);
314 (*f)(tid, "r22", vex->guest_r22);
315 (*f)(tid, "r23", vex->guest_r23);
316 (*f)(tid, "r24", vex->guest_r24);
317 (*f)(tid, "r25", vex->guest_r25);
318 (*f)(tid, "r26", vex->guest_r26);
319 (*f)(tid, "r27", vex->guest_r27);
320 (*f)(tid, "r28", vex->guest_r28);
321 (*f)(tid, "r29", vex->guest_r29);
322 (*f)(tid, "r30", vex->guest_r30);
323 (*f)(tid, "r31", vex->guest_r31);
324 #elif defined(VGA_arm64)
325 (*f)(tid, "x0" , vex->guest_X0 );
326 (*f)(tid, "x1" , vex->guest_X1 );
327 (*f)(tid, "x2" , vex->guest_X2 );
328 (*f)(tid, "x3" , vex->guest_X3 );
329 (*f)(tid, "x4" , vex->guest_X4 );
330 (*f)(tid, "x5" , vex->guest_X5 );
331 (*f)(tid, "x6" , vex->guest_X6 );
332 (*f)(tid, "x7" , vex->guest_X7 );
333 (*f)(tid, "x8" , vex->guest_X8 );
334 (*f)(tid, "x9" , vex->guest_X9 );
335 (*f)(tid, "x10", vex->guest_X10);
336 (*f)(tid, "x11", vex->guest_X11);
337 (*f)(tid, "x12", vex->guest_X12);
338 (*f)(tid, "x13", vex->guest_X13);
339 (*f)(tid, "x14", vex->guest_X14);
340 (*f)(tid, "x15", vex->guest_X15);
341 (*f)(tid, "x16", vex->guest_X16);
342 (*f)(tid, "x17", vex->guest_X17);
343 (*f)(tid, "x18", vex->guest_X18);
344 (*f)(tid, "x19", vex->guest_X19);
345 (*f)(tid, "x20", vex->guest_X20);
346 (*f)(tid, "x21", vex->guest_X21);
347 (*f)(tid, "x22", vex->guest_X22);
348 (*f)(tid, "x23", vex->guest_X23);
349 (*f)(tid, "x24", vex->guest_X24);
350 (*f)(tid, "x25", vex->guest_X25);
351 (*f)(tid, "x26", vex->guest_X26);
352 (*f)(tid, "x27", vex->guest_X27);
353 (*f)(tid, "x28", vex->guest_X28);
354 (*f)(tid, "x29", vex->guest_X29);
355 (*f)(tid, "x30", vex->guest_X30);
356 #else
357 # error Unknown arch
358 #endif
362 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
364 ThreadId tid;
366 for (tid = 1; tid < VG_N_THREADS; tid++) {
367 if (VG_(is_valid_tid)(tid)
368 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
369 // live thread or thread instructed to die by another thread that
370 // called exit.
371 apply_to_GPs_of_tid(tid, f);
376 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
378 *tid = (ThreadId)(-1);
381 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
382 /*OUT*/Addr* stack_min,
383 /*OUT*/Addr* stack_max)
385 ThreadId i;
386 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
387 if (i == VG_INVALID_THREADID)
388 continue;
389 if (VG_(threads)[i].status != VgTs_Empty) {
390 *tid = i;
391 *stack_min = VG_(get_SP)(i);
392 *stack_max = VG_(threads)[i].client_stack_highest_byte;
393 return True;
396 return False;
399 Addr VG_(thread_get_stack_max)(ThreadId tid)
401 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
402 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
403 return VG_(threads)[tid].client_stack_highest_byte;
406 SizeT VG_(thread_get_stack_size)(ThreadId tid)
408 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
409 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
410 return VG_(threads)[tid].client_stack_szB;
413 Addr VG_(thread_get_altstack_min)(ThreadId tid)
415 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
416 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
417 return (Addr)VG_(threads)[tid].altstack.ss_sp;
420 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
422 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
423 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
424 return VG_(threads)[tid].altstack.ss_size;
427 //-------------------------------------------------------------
428 /* Details about the capabilities of the underlying (host) CPU. These
429 details are acquired by (1) enquiring with the CPU at startup, or
430 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
431 line size). It's a bit nasty in the sense that there's no obvious
432 way to stop uses of some of this info before it's ready to go.
433 See pub_core_machine.h for more information about that.
435 VG_(machine_get_hwcaps) may use signals (although it attempts to
436 leave signal state unchanged) and therefore should only be
437 called before m_main sets up the client's signal state.
440 /* --------- State --------- */
441 static Bool hwcaps_done = False;
443 /* --- all archs --- */
444 static VexArch va = VexArch_INVALID;
445 static VexArchInfo vai;
447 #if defined(VGA_x86)
448 UInt VG_(machine_x86_have_mxcsr) = 0;
449 #endif
450 #if defined(VGA_ppc32)
451 UInt VG_(machine_ppc32_has_FP) = 0;
452 UInt VG_(machine_ppc32_has_VMX) = 0;
453 #endif
454 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
455 ULong VG_(machine_ppc64_has_VMX) = 0;
456 #endif
457 #if defined(VGA_arm)
458 Int VG_(machine_arm_archlevel) = 4;
459 #endif
462 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
463 testing, so we need a VG_MINIMAL_JMP_BUF. */
464 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
465 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
466 #include "pub_core_libcsetjmp.h"
467 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
468 static void handler_unsup_insn ( Int x ) {
469 VG_MINIMAL_LONGJMP(env_unsup_insn);
471 #endif
474 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
475 * handlers are installed. Determines the sizes affected by dcbz
476 * and dcbzl instructions and updates the given VexArchInfo structure
477 * accordingly.
479 * Not very defensive: assumes that as long as the dcbz/dcbzl
480 * instructions don't raise a SIGILL, that they will zero an aligned,
481 * contiguous block of memory of a sensible size. */
482 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
483 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
485 Int dcbz_szB = 0;
486 Int dcbzl_szB;
487 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
488 char test_block[4*MAX_DCBZL_SZB];
489 char *aligned = test_block;
490 Int i;
492 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
493 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
494 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
496 /* dcbz often clears 32B, although sometimes whatever the native cache
497 * block size is */
498 VG_(memset)(test_block, 0xff, sizeof(test_block));
499 __asm__ __volatile__("dcbz 0,%0"
500 : /*out*/
501 : "r" (aligned) /*in*/
502 : "memory" /*clobber*/);
503 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
504 if (!test_block[i])
505 ++dcbz_szB;
507 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
509 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
510 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
511 dcbzl_szB = 0; /* indicates unsupported */
513 else {
514 VG_(memset)(test_block, 0xff, sizeof(test_block));
515 /* some older assemblers won't understand the dcbzl instruction
516 * variant, so we directly emit the instruction ourselves */
517 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
518 : /*out*/
519 : "r" (aligned) /*in*/
520 : "memory", "r9" /*clobber*/);
521 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
522 if (!test_block[i])
523 ++dcbzl_szB;
525 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
528 arch_info->ppc_dcbz_szB = dcbz_szB;
529 arch_info->ppc_dcbzl_szB = dcbzl_szB;
531 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
532 dcbz_szB, dcbzl_szB);
533 # undef MAX_DCBZL_SZB
535 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
537 #ifdef VGA_s390x
539 /* Read /proc/cpuinfo. Look for lines like these
541 processor 0: version = FF, identification = 0117C9, machine = 2064
543 and return the machine model. If the machine model could not be determined
544 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
546 static UInt VG_(get_machine_model)(void)
548 static struct model_map {
549 const HChar name[5];
550 UInt id;
551 } model_map[] = {
552 { "2064", VEX_S390X_MODEL_Z900 },
553 { "2066", VEX_S390X_MODEL_Z800 },
554 { "2084", VEX_S390X_MODEL_Z990 },
555 { "2086", VEX_S390X_MODEL_Z890 },
556 { "2094", VEX_S390X_MODEL_Z9_EC },
557 { "2096", VEX_S390X_MODEL_Z9_BC },
558 { "2097", VEX_S390X_MODEL_Z10_EC },
559 { "2098", VEX_S390X_MODEL_Z10_BC },
560 { "2817", VEX_S390X_MODEL_Z196 },
561 { "2818", VEX_S390X_MODEL_Z114 },
562 { "2827", VEX_S390X_MODEL_ZEC12 },
563 { "2828", VEX_S390X_MODEL_ZBC12 },
564 { "2964", VEX_S390X_MODEL_Z13 },
565 { "2965", VEX_S390X_MODEL_Z13S },
568 Int model, n, fh;
569 SysRes fd;
570 SizeT num_bytes, file_buf_size;
571 HChar *p, *m, *model_name, *file_buf;
573 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
574 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
575 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
577 fh = sr_Res(fd);
579 /* Determine the size of /proc/cpuinfo.
580 Work around broken-ness in /proc file system implementation.
581 fstat returns a zero size for /proc/cpuinfo although it is
582 claimed to be a regular file. */
583 num_bytes = 0;
584 file_buf_size = 1000;
585 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
586 while (42) {
587 n = VG_(read)(fh, file_buf, file_buf_size);
588 if (n < 0) break;
590 num_bytes += n;
591 if (n < file_buf_size) break; /* reached EOF */
594 if (n < 0) num_bytes = 0; /* read error; ignore contents */
596 if (num_bytes > file_buf_size) {
597 VG_(free)( file_buf );
598 VG_(lseek)( fh, 0, VKI_SEEK_SET );
599 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
600 n = VG_(read)( fh, file_buf, num_bytes );
601 if (n < 0) num_bytes = 0;
604 file_buf[num_bytes] = '\0';
605 VG_(close)(fh);
607 /* Parse file */
608 model = VEX_S390X_MODEL_UNKNOWN;
609 for (p = file_buf; *p; ++p) {
610 /* Beginning of line */
611 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
613 m = VG_(strstr)( p, "machine" );
614 if (m == NULL) continue;
616 p = m + sizeof "machine" - 1;
617 while ( VG_(isspace)( *p ) || *p == '=') {
618 if (*p == '\n') goto next_line;
619 ++p;
622 model_name = p;
623 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
624 struct model_map *mm = model_map + n;
625 SizeT len = VG_(strlen)( mm->name );
626 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
627 VG_(isspace)( model_name[len] )) {
628 if (mm->id < model) model = mm->id;
629 p = model_name + len;
630 break;
633 /* Skip until end-of-line */
634 while (*p != '\n')
635 ++p;
636 next_line: ;
639 VG_(free)( file_buf );
640 VG_(debugLog)(1, "machine", "model = %s\n",
641 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
642 : model_map[model].name);
643 return model;
646 #endif /* defined(VGA_s390x) */
648 #if defined(VGA_mips32) || defined(VGA_mips64)
651 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
652 * determine what CPU it is (it searches only for the models that are or may be
653 * supported by Valgrind).
655 static Bool VG_(parse_cpuinfo)(void)
657 const char *search_Broadcom_str = "cpu model\t\t: Broadcom";
658 const char *search_Cavium_str= "cpu model\t\t: Cavium";
659 const char *search_Ingenic_str= "cpu model\t\t: Ingenic";
660 const char *search_Loongson_str= "cpu model\t\t: ICT Loongson";
661 const char *search_MIPS_str = "cpu model\t\t: MIPS";
662 const char *search_Netlogic_str = "cpu model\t\t: Netlogic";
664 Int n, fh;
665 SysRes fd;
666 SizeT num_bytes, file_buf_size;
667 HChar *file_buf, *isa;
669 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
670 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
671 if ( sr_isError(fd) ) return False;
673 fh = sr_Res(fd);
675 /* Determine the size of /proc/cpuinfo.
676 Work around broken-ness in /proc file system implementation.
677 fstat returns a zero size for /proc/cpuinfo although it is
678 claimed to be a regular file. */
679 num_bytes = 0;
680 file_buf_size = 1000;
681 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
682 while (42) {
683 n = VG_(read)(fh, file_buf, file_buf_size);
684 if (n < 0) break;
686 num_bytes += n;
687 if (n < file_buf_size) break; /* reached EOF */
690 if (n < 0) num_bytes = 0; /* read error; ignore contents */
692 if (num_bytes > file_buf_size) {
693 VG_(free)( file_buf );
694 VG_(lseek)( fh, 0, VKI_SEEK_SET );
695 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
696 n = VG_(read)( fh, file_buf, num_bytes );
697 if (n < 0) num_bytes = 0;
700 file_buf[num_bytes] = '\0';
701 VG_(close)(fh);
703 /* Parse file */
704 if (VG_(strstr)(file_buf, search_Broadcom_str) != NULL)
705 vai.hwcaps = VEX_PRID_COMP_BROADCOM;
706 else if (VG_(strstr)(file_buf, search_Netlogic_str) != NULL)
707 vai.hwcaps = VEX_PRID_COMP_NETLOGIC;
708 else if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
709 vai.hwcaps = VEX_PRID_COMP_CAVIUM;
710 else if (VG_(strstr)(file_buf, search_MIPS_str) != NULL)
711 vai.hwcaps = VEX_PRID_COMP_MIPS;
712 else if (VG_(strstr)(file_buf, search_Ingenic_str) != NULL)
713 vai.hwcaps = VEX_PRID_COMP_INGENIC_E1;
714 else if (VG_(strstr)(file_buf, search_Loongson_str) != NULL)
715 vai.hwcaps = (VEX_PRID_COMP_LEGACY | VEX_PRID_IMP_LOONGSON_64);
716 else {
717 /* Did not find string in the proc file. */
718 vai.hwcaps = 0;
719 VG_(free)(file_buf);
720 return False;
723 isa = VG_(strstr)(file_buf, "isa\t\t\t: ");
725 if (NULL != isa) {
726 if (VG_(strstr) (isa, "mips32r1") != NULL)
727 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
728 if (VG_(strstr) (isa, "mips32r2") != NULL)
729 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
730 if (VG_(strstr) (isa, "mips32r6") != NULL)
731 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R6;
732 if (VG_(strstr) (isa, "mips64r1") != NULL)
733 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R1;
734 if (VG_(strstr) (isa, "mips64r2") != NULL)
735 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2;
736 if (VG_(strstr) (isa, "mips64r6") != NULL)
737 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R6;
740 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
741 * decide to change incorrect settings in
742 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
743 * The current settings show mips32r1, mips32r2 and mips64r1 as
744 * unsupported ISAs by Cavium MIPS CPUs.
746 if (VEX_MIPS_COMP_ID(vai.hwcaps) == VEX_PRID_COMP_CAVIUM) {
747 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1 | VEX_MIPS_CPU_ISA_M32R2 |
748 VEX_MIPS_CPU_ISA_M64R1;
750 } else {
752 * Kernel does not provide information about supported ISAs.
753 * Populate the isa level flags based on the CPU model. That is our
754 * best guess.
756 switch VEX_MIPS_COMP_ID(vai.hwcaps) {
757 case VEX_PRID_COMP_CAVIUM:
758 case VEX_PRID_COMP_NETLOGIC:
759 vai.hwcaps |= (VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1);
760 case VEX_PRID_COMP_INGENIC_E1:
761 case VEX_PRID_COMP_MIPS:
762 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R2;
763 case VEX_PRID_COMP_BROADCOM:
764 vai.hwcaps |= VEX_MIPS_CPU_ISA_M32R1;
765 break;
766 case VEX_PRID_COMP_LEGACY:
767 if ((VEX_MIPS_PROC_ID(vai.hwcaps) == VEX_PRID_IMP_LOONGSON_64))
768 vai.hwcaps |= VEX_MIPS_CPU_ISA_M64R2 | VEX_MIPS_CPU_ISA_M64R1 |
769 VEX_MIPS_CPU_ISA_M32R2 | VEX_MIPS_CPU_ISA_M32R1;
770 break;
771 default:
772 break;
775 VG_(free)(file_buf);
776 return True;
779 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
781 #if defined(VGP_arm64_linux)
783 /* Check to see whether we are running on a Cavium core, and if so auto-enable
784 the fallback LLSC implementation. See #369459. */
786 static Bool VG_(parse_cpuinfo)(void)
788 const char *search_Cavium_str = "CPU implementer\t: 0x43";
790 Int n, fh;
791 SysRes fd;
792 SizeT num_bytes, file_buf_size;
793 HChar *file_buf;
795 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
796 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
797 if ( sr_isError(fd) ) return False;
799 fh = sr_Res(fd);
801 /* Determine the size of /proc/cpuinfo.
802 Work around broken-ness in /proc file system implementation.
803 fstat returns a zero size for /proc/cpuinfo although it is
804 claimed to be a regular file. */
805 num_bytes = 0;
806 file_buf_size = 1000;
807 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
808 while (42) {
809 n = VG_(read)(fh, file_buf, file_buf_size);
810 if (n < 0) break;
812 num_bytes += n;
813 if (n < file_buf_size) break; /* reached EOF */
816 if (n < 0) num_bytes = 0; /* read error; ignore contents */
818 if (num_bytes > file_buf_size) {
819 VG_(free)( file_buf );
820 VG_(lseek)( fh, 0, VKI_SEEK_SET );
821 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
822 n = VG_(read)( fh, file_buf, num_bytes );
823 if (n < 0) num_bytes = 0;
826 file_buf[num_bytes] = '\0';
827 VG_(close)(fh);
829 /* Parse file */
830 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
831 vai.arm64_requires_fallback_LLSC = True;
833 VG_(free)(file_buf);
834 return True;
837 #endif /* defined(VGP_arm64_linux) */
839 Bool VG_(machine_get_hwcaps)( void )
841 vg_assert(hwcaps_done == False);
842 hwcaps_done = True;
844 // Whack default settings into vai, so that we only need to fill in
845 // any interesting bits.
846 LibVEX_default_VexArchInfo(&vai);
848 #if defined(VGA_x86)
849 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
850 UInt eax, ebx, ecx, edx, max_extended;
851 HChar vstr[13];
852 vstr[0] = 0;
854 if (!VG_(has_cpuid)())
855 /* we can't do cpuid at all. Give up. */
856 return False;
858 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
859 if (eax < 1)
860 /* we can't ask for cpuid(x) for x > 0. Give up. */
861 return False;
863 /* Get processor ID string, and max basic/extended index
864 values. */
865 VG_(memcpy)(&vstr[0], &ebx, 4);
866 VG_(memcpy)(&vstr[4], &edx, 4);
867 VG_(memcpy)(&vstr[8], &ecx, 4);
868 vstr[12] = 0;
870 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
871 max_extended = eax;
873 /* get capabilities bits into edx */
874 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
876 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
877 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
878 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
880 /* cmpxchg8b is a minimum requirement now; if we don't have it we
881 must simply give up. But all CPUs since Pentium-I have it, so
882 that doesn't seem like much of a restriction. */
883 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
884 if (!have_cx8)
885 return False;
887 /* Figure out if this is an AMD that can do MMXEXT. */
888 have_mmxext = False;
889 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
890 && max_extended >= 0x80000001) {
891 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
892 /* Some older AMD processors support a sse1 subset (Integer SSE). */
893 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
896 /* Figure out if this is an AMD or Intel that can do LZCNT. */
897 have_lzcnt = False;
898 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
899 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
900 && max_extended >= 0x80000001) {
901 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
902 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
905 /* Intel processors don't define the mmxext extension, but since it
906 is just a sse1 subset always define it when we have sse1. */
907 if (have_sse1)
908 have_mmxext = True;
910 va = VexArchX86;
911 vai.endness = VexEndnessLE;
913 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
914 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
915 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
916 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
917 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
918 if (have_lzcnt)
919 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
920 VG_(machine_x86_have_mxcsr) = 1;
921 } else if (have_sse2 && have_sse1 && have_mmxext) {
922 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
923 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
924 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
925 if (have_lzcnt)
926 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
927 VG_(machine_x86_have_mxcsr) = 1;
928 } else if (have_sse1 && have_mmxext) {
929 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
930 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
931 VG_(machine_x86_have_mxcsr) = 1;
932 } else if (have_mmxext) {
933 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
934 VG_(machine_x86_have_mxcsr) = 0;
935 } else {
936 vai.hwcaps = 0; /*baseline - no sse at all*/
937 VG_(machine_x86_have_mxcsr) = 0;
940 VG_(machine_get_cache_info)(&vai);
942 return True;
945 #elif defined(VGA_amd64)
946 { Bool have_sse3, have_cx8, have_cx16;
947 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
948 Bool have_rdtscp;
949 UInt eax, ebx, ecx, edx, max_basic, max_extended;
950 ULong xgetbv_0 = 0;
951 HChar vstr[13];
952 vstr[0] = 0;
954 if (!VG_(has_cpuid)())
955 /* we can't do cpuid at all. Give up. */
956 return False;
958 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
959 max_basic = eax;
960 if (max_basic < 1)
961 /* we can't ask for cpuid(x) for x > 0. Give up. */
962 return False;
964 /* Get processor ID string, and max basic/extended index
965 values. */
966 VG_(memcpy)(&vstr[0], &ebx, 4);
967 VG_(memcpy)(&vstr[4], &edx, 4);
968 VG_(memcpy)(&vstr[8], &ecx, 4);
969 vstr[12] = 0;
971 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
972 max_extended = eax;
974 /* get capabilities bits into edx */
975 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
977 // we assume that SSE1 and SSE2 are available by default
978 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
979 // ssse3 is ecx:9
980 // sse41 is ecx:19
981 // sse42 is ecx:20
983 // xsave is ecx:26
984 // osxsave is ecx:27
985 // avx is ecx:28
986 // fma is ecx:12
987 have_avx = False;
988 /* have_fma = False; */
989 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
990 /* Processor supports AVX instructions and XGETBV is enabled
991 by OS and AVX instructions are enabled by the OS. */
992 ULong w;
993 __asm__ __volatile__("movq $0,%%rcx ; "
994 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
995 "movq %%rax,%0"
996 :/*OUT*/"=r"(w) :/*IN*/
997 :/*TRASH*/"rdx","rcx","rax");
998 xgetbv_0 = w;
999 if ((xgetbv_0 & 7) == 7) {
1000 /* Only say we have AVX if the XSAVE-allowable
1001 bitfield-mask allows x87, SSE and AVX state. We could
1002 actually run with a more restrictive XGETBV(0) value,
1003 but VEX's implementation of XSAVE and XRSTOR assumes
1004 that all 3 bits are enabled.
1006 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1007 state component [2] (the YMM high halves) are located in
1008 the XSAVE image at offsets 576 .. 831. So we have to
1009 check that here before declaring AVX to be supported. */
1010 UInt eax2, ebx2, ecx2, edx2;
1011 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
1012 if (ebx2 == 576 && eax2 == 256) {
1013 have_avx = True;
1015 /* have_fma = (ecx & (1<<12)) != 0; */
1016 /* have_fma: Probably correct, but gcc complains due to
1017 unusedness. */
1021 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1022 must simply give up. But all CPUs since Pentium-I have it, so
1023 that doesn't seem like much of a restriction. */
1024 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
1025 if (!have_cx8)
1026 return False;
1028 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1029 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
1031 /* Figure out if this CPU can do LZCNT. */
1032 have_lzcnt = False;
1033 if (max_extended >= 0x80000001) {
1034 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1035 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
1038 /* Can we do RDTSCP? */
1039 have_rdtscp = False;
1040 if (max_extended >= 0x80000001) {
1041 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
1042 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
1045 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1046 have_bmi = False;
1047 have_avx2 = False;
1048 if (have_avx && max_basic >= 7) {
1049 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
1050 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
1051 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
1054 va = VexArchAMD64;
1055 vai.endness = VexEndnessLE;
1056 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
1057 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
1058 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
1059 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
1060 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
1061 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
1062 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
1064 VG_(machine_get_cache_info)(&vai);
1066 return True;
1069 #elif defined(VGA_ppc32)
1071 /* Find out which subset of the ppc32 instruction set is supported by
1072 verifying whether various ppc32 instructions generate a SIGILL
1073 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1074 AT_PLATFORM entries in the ELF auxiliary table -- see also
1075 the_iifii.client_auxv in m_main.c.
1077 vki_sigset_t saved_set, tmp_set;
1078 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1079 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1081 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1082 volatile Bool have_isa_2_07, have_isa_3_0;
1083 Int r;
1085 /* This is a kludge. Really we ought to back-convert saved_act
1086 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1087 since that's a no-op on all ppc32 platforms so far supported,
1088 it's not worth the typing effort. At least include most basic
1089 sanity check: */
1090 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1092 VG_(sigemptyset)(&tmp_set);
1093 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1094 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1096 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1097 vg_assert(r == 0);
1099 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1100 vg_assert(r == 0);
1101 tmp_sigill_act = saved_sigill_act;
1103 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1104 vg_assert(r == 0);
1105 tmp_sigfpe_act = saved_sigfpe_act;
1107 /* NODEFER: signal handler does not return (from the kernel's point of
1108 view), hence if it is to successfully catch a signal more than once,
1109 we need the NODEFER flag. */
1110 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1111 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1112 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1113 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1114 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1115 vg_assert(r == 0);
1117 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1118 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1119 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1120 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1121 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1122 vg_assert(r == 0);
1124 /* standard FP insns */
1125 have_F = True;
1126 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1127 have_F = False;
1128 } else {
1129 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1132 /* Altivec insns */
1133 have_V = True;
1134 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1135 have_V = False;
1136 } else {
1137 /* Unfortunately some older assemblers don't speak Altivec (or
1138 choose not to), so to be safe we directly emit the 32-bit
1139 word corresponding to "vor 0,0,0". This fixes a build
1140 problem that happens on Debian 3.1 (ppc32), and probably
1141 various other places. */
1142 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1145 /* General-Purpose optional (fsqrt, fsqrts) */
1146 have_FX = True;
1147 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1148 have_FX = False;
1149 } else {
1150 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1153 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1154 have_GX = True;
1155 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1156 have_GX = False;
1157 } else {
1158 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1161 /* VSX support implies Power ISA 2.06 */
1162 have_VX = True;
1163 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1164 have_VX = False;
1165 } else {
1166 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1169 /* Check for Decimal Floating Point (DFP) support. */
1170 have_DFP = True;
1171 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1172 have_DFP = False;
1173 } else {
1174 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1177 /* Check for ISA 2.07 support. */
1178 have_isa_2_07 = True;
1179 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1180 have_isa_2_07 = False;
1181 } else {
1182 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1185 /* Check for ISA 3.0 support. */
1186 have_isa_3_0 = True;
1187 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1188 have_isa_3_0 = False;
1189 } else {
1190 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1193 /* determine dcbz/dcbzl sizes while we still have the signal
1194 * handlers registered */
1195 find_ppc_dcbz_sz(&vai);
1197 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1198 vg_assert(r == 0);
1199 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1200 vg_assert(r == 0);
1201 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1202 vg_assert(r == 0);
1203 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1204 (Int)have_F, (Int)have_V, (Int)have_FX,
1205 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1206 (Int)have_isa_2_07, (Int)have_isa_3_0);
1207 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1208 if (have_V && !have_F)
1209 have_V = False;
1210 if (have_FX && !have_F)
1211 have_FX = False;
1212 if (have_GX && !have_F)
1213 have_GX = False;
1215 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1216 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1218 va = VexArchPPC32;
1219 vai.endness = VexEndnessBE;
1221 vai.hwcaps = 0;
1222 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1223 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1224 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1225 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1226 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1227 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1228 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1229 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0;
1231 VG_(machine_get_cache_info)(&vai);
1233 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1234 called before we're ready to go. */
1235 return True;
1238 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1240 /* Same instruction set detection algorithm as for ppc32. */
1241 vki_sigset_t saved_set, tmp_set;
1242 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1243 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1245 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1246 volatile Bool have_isa_2_07, have_isa_3_0;
1247 Int r;
1249 /* This is a kludge. Really we ought to back-convert saved_act
1250 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1251 since that's a no-op on all ppc64 platforms so far supported,
1252 it's not worth the typing effort. At least include most basic
1253 sanity check: */
1254 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1256 VG_(sigemptyset)(&tmp_set);
1257 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1258 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1260 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1261 vg_assert(r == 0);
1263 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1264 vg_assert(r == 0);
1265 tmp_sigill_act = saved_sigill_act;
1267 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1268 tmp_sigfpe_act = saved_sigfpe_act;
1270 /* NODEFER: signal handler does not return (from the kernel's point of
1271 view), hence if it is to successfully catch a signal more than once,
1272 we need the NODEFER flag. */
1273 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1274 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1275 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1276 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1277 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1279 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1280 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1281 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1282 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1283 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1285 /* standard FP insns */
1286 have_F = True;
1287 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1288 have_F = False;
1289 } else {
1290 __asm__ __volatile__("fmr 0,0");
1293 /* Altivec insns */
1294 have_V = True;
1295 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1296 have_V = False;
1297 } else {
1298 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1301 /* General-Purpose optional (fsqrt, fsqrts) */
1302 have_FX = True;
1303 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1304 have_FX = False;
1305 } else {
1306 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1309 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1310 have_GX = True;
1311 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1312 have_GX = False;
1313 } else {
1314 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1317 /* VSX support implies Power ISA 2.06 */
1318 have_VX = True;
1319 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1320 have_VX = False;
1321 } else {
1322 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1325 /* Check for Decimal Floating Point (DFP) support. */
1326 have_DFP = True;
1327 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1328 have_DFP = False;
1329 } else {
1330 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1333 /* Check for ISA 2.07 support. */
1334 have_isa_2_07 = True;
1335 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1336 have_isa_2_07 = False;
1337 } else {
1338 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1341 /* Check for ISA 3.0 support. */
1342 have_isa_3_0 = True;
1343 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1344 have_isa_3_0 = False;
1345 } else {
1346 __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1349 /* determine dcbz/dcbzl sizes while we still have the signal
1350 * handlers registered */
1351 find_ppc_dcbz_sz(&vai);
1353 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1354 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1355 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1356 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1357 (Int)have_F, (Int)have_V, (Int)have_FX,
1358 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1359 (Int)have_isa_2_07, (int)have_isa_3_0);
1360 /* on ppc64be, if we don't even have FP, just give up. */
1361 if (!have_F)
1362 return False;
1364 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1366 va = VexArchPPC64;
1367 # if defined(VKI_LITTLE_ENDIAN)
1368 vai.endness = VexEndnessLE;
1369 # elif defined(VKI_BIG_ENDIAN)
1370 vai.endness = VexEndnessBE;
1371 # else
1372 vai.endness = VexEndness_INVALID;
1373 # endif
1375 vai.hwcaps = 0;
1376 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1377 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1378 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1379 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1380 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1381 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1382 if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0;
1384 VG_(machine_get_cache_info)(&vai);
1386 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1387 called before we're ready to go. */
1388 return True;
1391 #elif defined(VGA_s390x)
1393 # include "libvex_s390x_common.h"
1396 /* Instruction set detection code borrowed from ppc above. */
1397 vki_sigset_t saved_set, tmp_set;
1398 vki_sigaction_fromK_t saved_sigill_act;
1399 vki_sigaction_toK_t tmp_sigill_act;
1401 volatile Bool have_LDISP, have_STFLE;
1402 Int i, r, model;
1404 /* If the model is "unknown" don't treat this as an error. Assume
1405 this is a brand-new machine model for which we don't have the
1406 identification yet. Keeping fingers crossed. */
1407 model = VG_(get_machine_model)();
1409 /* Unblock SIGILL and stash away the old action for that signal */
1410 VG_(sigemptyset)(&tmp_set);
1411 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1413 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1414 vg_assert(r == 0);
1416 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1417 vg_assert(r == 0);
1418 tmp_sigill_act = saved_sigill_act;
1420 /* NODEFER: signal handler does not return (from the kernel's point of
1421 view), hence if it is to successfully catch a signal more than once,
1422 we need the NODEFER flag. */
1423 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1424 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1425 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1426 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1427 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1429 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1430 is not supported on z900. */
1432 have_LDISP = True;
1433 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1434 have_LDISP = False;
1435 } else {
1436 /* BASR loads the address of the next insn into r1. Needed to avoid
1437 a segfault in XY. */
1438 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1439 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1440 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1443 /* Check availability of STFLE. If available store facility bits
1444 in hoststfle. */
1445 ULong hoststfle[S390_NUM_FACILITY_DW];
1447 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1448 hoststfle[i] = 0;
1450 have_STFLE = True;
1451 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1452 have_STFLE = False;
1453 } else {
1454 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1456 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1457 : "=m" (hoststfle), "+d"(reg0)
1458 : : "cc", "memory");
1461 /* Restore signals */
1462 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1463 vg_assert(r == 0);
1464 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1465 vg_assert(r == 0);
1466 va = VexArchS390X;
1467 vai.endness = VexEndnessBE;
1469 vai.hwcaps = model;
1470 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1471 if (have_LDISP) {
1472 /* Use long displacement only on machines >= z990. For all other
1473 machines it is millicoded and therefore slow. */
1474 if (model >= VEX_S390X_MODEL_Z990)
1475 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1478 /* Detect presence of certain facilities using the STFLE insn.
1479 Note, that these facilities were introduced at the same time or later
1480 as STFLE, so the absence of STLFE implies the absence of the facility
1481 we're trying to detect. */
1482 struct fac_hwcaps_map {
1483 UInt installed;
1484 UInt facility_bit;
1485 UInt hwcaps_bit;
1486 const HChar name[6]; // may need adjustment for new facility names
1487 } fac_hwcaps[] = {
1488 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1489 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1490 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1491 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1492 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1493 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1494 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1495 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1496 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1497 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1498 { False, S390_FAC_VX, VEX_HWCAPS_S390X_VX, "VX" }
1501 /* Set hwcaps according to the detected facilities */
1502 UChar dw_number = 0;
1503 UChar fac_bit = 0;
1504 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1505 vg_assert(fac_hwcaps[i].facility_bit <= 191); // for now
1506 dw_number = fac_hwcaps[i].facility_bit / 64;
1507 fac_bit = fac_hwcaps[i].facility_bit % 64;
1508 if (hoststfle[dw_number] & (1ULL << (63 - fac_bit))) {
1509 fac_hwcaps[i].installed = True;
1510 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1514 /* Build up a string showing the probed-for facilities */
1515 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1516 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1517 7 + 1 + 4 + 2 // machine %4d
1518 + 1]; // \0
1519 HChar *p = fac_str;
1520 p += VG_(sprintf)(p, "machine %4d ", model);
1521 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1522 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1523 fac_hwcaps[i].installed);
1525 *p++ = '\0';
1527 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1528 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1530 VG_(machine_get_cache_info)(&vai);
1532 return True;
1535 #elif defined(VGA_arm)
1537 /* Same instruction set detection algorithm as for ppc32. */
1538 vki_sigset_t saved_set, tmp_set;
1539 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1540 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1542 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
1543 volatile Int archlevel;
1544 Int r;
1546 /* This is a kludge. Really we ought to back-convert saved_act
1547 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1548 since that's a no-op on all ppc64 platforms so far supported,
1549 it's not worth the typing effort. At least include most basic
1550 sanity check: */
1551 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1553 VG_(sigemptyset)(&tmp_set);
1554 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1555 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1557 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1558 vg_assert(r == 0);
1560 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1561 vg_assert(r == 0);
1562 tmp_sigill_act = saved_sigill_act;
1564 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1565 tmp_sigfpe_act = saved_sigfpe_act;
1567 /* NODEFER: signal handler does not return (from the kernel's point of
1568 view), hence if it is to successfully catch a signal more than once,
1569 we need the NODEFER flag. */
1570 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1571 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1572 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1573 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1574 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1576 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1577 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1578 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1579 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1580 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1582 /* VFP insns */
1583 have_VFP = True;
1584 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1585 have_VFP = False;
1586 } else {
1587 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1589 /* There are several generation of VFP extension but they differs very
1590 little so for now we will not distinguish them. */
1591 have_VFP2 = have_VFP;
1592 have_VFP3 = have_VFP;
1594 /* NEON insns */
1595 have_NEON = True;
1596 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1597 have_NEON = False;
1598 } else {
1599 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1602 /* ARM architecture level */
1603 archlevel = 5; /* v5 will be base level */
1604 if (archlevel < 7) {
1605 archlevel = 7;
1606 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1607 archlevel = 5;
1608 } else {
1609 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1612 if (archlevel < 6) {
1613 archlevel = 6;
1614 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1615 archlevel = 5;
1616 } else {
1617 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1621 /* ARMv8 insns */
1622 have_V8 = True;
1623 if (archlevel == 7) {
1624 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1625 have_V8 = False;
1626 } else {
1627 __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1629 if (have_V8 && have_NEON && have_VFP3) {
1630 archlevel = 8;
1634 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1635 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1636 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1637 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1638 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1640 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1641 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1642 (Int)have_NEON);
1644 VG_(machine_arm_archlevel) = archlevel;
1646 va = VexArchARM;
1647 vai.endness = VexEndnessLE;
1649 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1650 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1651 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1652 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1653 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1655 VG_(machine_get_cache_info)(&vai);
1657 return True;
1660 #elif defined(VGA_arm64)
1662 va = VexArchARM64;
1663 vai.endness = VexEndnessLE;
1665 /* So far there are no variants. */
1666 vai.hwcaps = 0;
1668 VG_(machine_get_cache_info)(&vai);
1670 /* Check whether we need to use the fallback LLSC implementation.
1671 If the check fails, give up. */
1672 if (! VG_(parse_cpuinfo)())
1673 return False;
1675 /* 0 denotes 'not set'. The range of legitimate values here,
1676 after being set that is, is 2 though 17 inclusive. */
1677 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1678 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1679 ULong ctr_el0;
1680 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1681 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1682 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1683 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1684 "ctr_el0.iMinLine_szB = %d\n",
1685 1 << vai.arm64_dMinLine_lg2_szB,
1686 1 << vai.arm64_iMinLine_lg2_szB);
1687 VG_(debugLog)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1688 vai.arm64_requires_fallback_LLSC ? "yes" : "no");
1690 return True;
1693 #elif defined(VGA_mips32)
1695 /* Define the position of F64 bit in FIR register. */
1696 # define FP64 22
1697 va = VexArchMIPS32;
1698 if (!VG_(parse_cpuinfo)())
1699 return False;
1701 # if defined(VKI_LITTLE_ENDIAN)
1702 vai.endness = VexEndnessLE;
1703 # elif defined(VKI_BIG_ENDIAN)
1704 vai.endness = VexEndnessBE;
1705 # else
1706 vai.endness = VexEndness_INVALID;
1707 # endif
1709 /* Same instruction set detection algorithm as for ppc32/arm... */
1710 vki_sigset_t saved_set, tmp_set;
1711 vki_sigaction_fromK_t saved_sigill_act;
1712 vki_sigaction_toK_t tmp_sigill_act;
1714 volatile Bool have_DSP, have_DSPr2, have_MSA;
1715 Int r;
1717 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1719 VG_(sigemptyset)(&tmp_set);
1720 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1722 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1723 vg_assert(r == 0);
1725 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1726 vg_assert(r == 0);
1727 tmp_sigill_act = saved_sigill_act;
1729 /* NODEFER: signal handler does not return (from the kernel's point of
1730 view), hence if it is to successfully catch a signal more than once,
1731 we need the NODEFER flag. */
1732 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1733 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1734 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1735 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1736 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1738 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1740 /* MSA instructions. */
1741 have_MSA = True;
1742 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1743 have_MSA = False;
1744 } else {
1745 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1747 if (have_MSA) {
1748 vai.hwcaps |= VEX_PRID_IMP_P5600;
1749 } else {
1750 /* DSPr2 instructions. */
1751 have_DSPr2 = True;
1752 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1753 have_DSPr2 = False;
1754 } else {
1755 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1757 if (have_DSPr2) {
1758 /* We assume it's 74K, since it can run DSPr2. */
1759 vai.hwcaps |= VEX_PRID_IMP_74K;
1760 } else {
1761 /* DSP instructions. */
1762 have_DSP = True;
1763 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1764 have_DSP = False;
1765 } else {
1766 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1768 if (have_DSP) {
1769 /* We assume it's 34K, since it has support for DSP. */
1770 vai.hwcaps |= VEX_PRID_IMP_34K;
1776 # if defined(VGP_mips32_linux)
1777 Int fpmode = VG_(prctl)(VKI_PR_GET_FP_MODE, 0, 0, 0, 0);
1778 # else
1779 Int fpmode = -1;
1780 # endif
1782 if (fpmode < 0) {
1783 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1784 we are using alternative way to determine FP mode */
1785 ULong result = 0;
1787 if (!VG_MINIMAL_SETJMP(env_unsup_insn)) {
1788 __asm__ volatile (
1789 ".set push\n\t"
1790 ".set noreorder\n\t"
1791 ".set oddspreg\n\t"
1792 ".set hardfloat\n\t"
1793 "lui $t0, 0x3FF0\n\t"
1794 "ldc1 $f0, %0\n\t"
1795 "mtc1 $t0, $f1\n\t"
1796 "sdc1 $f0, %0\n\t"
1797 ".set pop\n\t"
1798 : "+m"(result)
1800 : "t0", "$f0", "$f1", "memory");
1802 fpmode = (result != 0x3FF0000000000000ull);
1806 if (fpmode != 0)
1807 vai.hwcaps |= VEX_MIPS_HOST_FR;
1809 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1810 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1811 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1813 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1814 VG_(machine_get_cache_info)(&vai);
1816 return True;
1819 #elif defined(VGA_mips64)
1821 va = VexArchMIPS64;
1822 if (!VG_(parse_cpuinfo)())
1823 return False;
1825 # if defined(VKI_LITTLE_ENDIAN)
1826 vai.endness = VexEndnessLE;
1827 # elif defined(VKI_BIG_ENDIAN)
1828 vai.endness = VexEndnessBE;
1829 # else
1830 vai.endness = VexEndness_INVALID;
1831 # endif
1833 vai.hwcaps |= VEX_MIPS_HOST_FR;
1835 /* Same instruction set detection algorithm as for ppc32/arm... */
1836 vki_sigset_t saved_set, tmp_set;
1837 vki_sigaction_fromK_t saved_sigill_act;
1838 vki_sigaction_toK_t tmp_sigill_act;
1840 volatile Bool have_MSA;
1841 Int r;
1843 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1845 VG_(sigemptyset)(&tmp_set);
1846 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1848 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1849 vg_assert(r == 0);
1851 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1852 vg_assert(r == 0);
1853 tmp_sigill_act = saved_sigill_act;
1855 /* NODEFER: signal handler does not return (from the kernel's point of
1856 view), hence if it is to successfully catch a signal more than once,
1857 we need the NODEFER flag. */
1858 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1859 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1860 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1861 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1862 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1864 if (VEX_PRID_COMP_MIPS == VEX_MIPS_COMP_ID(vai.hwcaps)) {
1866 /* MSA instructions */
1867 have_MSA = True;
1868 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1869 have_MSA = False;
1870 } else {
1871 __asm__ __volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1873 if (have_MSA) {
1874 vai.hwcaps |= VEX_PRID_IMP_P5600;
1878 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1879 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1880 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1882 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1884 VG_(machine_get_cache_info)(&vai);
1886 return True;
1889 #else
1890 # error "Unknown arch"
1891 #endif
1894 /* Notify host cpu instruction cache line size. */
1895 #if defined(VGA_ppc32)
1896 void VG_(machine_ppc32_set_clszB)( Int szB )
1898 vg_assert(hwcaps_done);
1900 /* Either the value must not have been set yet (zero) or we can
1901 tolerate it being set to the same value multiple times, as the
1902 stack scanning logic in m_main is a bit stupid. */
1903 vg_assert(vai.ppc_icache_line_szB == 0
1904 || vai.ppc_icache_line_szB == szB);
1906 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1907 vai.ppc_icache_line_szB = szB;
1909 #endif
1912 /* Notify host cpu instruction cache line size. */
1913 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1914 void VG_(machine_ppc64_set_clszB)( Int szB )
1916 vg_assert(hwcaps_done);
1918 /* Either the value must not have been set yet (zero) or we can
1919 tolerate it being set to the same value multiple times, as the
1920 stack scanning logic in m_main is a bit stupid. */
1921 vg_assert(vai.ppc_icache_line_szB == 0
1922 || vai.ppc_icache_line_szB == szB);
1924 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1925 vai.ppc_icache_line_szB = szB;
1927 #endif
1930 /* Notify host's ability to handle NEON instructions. */
1931 #if defined(VGA_arm)
1932 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1934 vg_assert(hwcaps_done);
1935 /* There's nothing else we can sanity check. */
1937 if (has_neon) {
1938 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1939 } else {
1940 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1943 #endif
1946 /* Fetch host cpu info, once established. */
1947 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1948 /*OUT*/VexArchInfo* pVai )
1950 vg_assert(hwcaps_done);
1951 if (pVa) *pVa = va;
1952 if (pVai) *pVai = vai;
1956 /* Returns the size of the largest guest register that we will
1957 simulate in this run. This depends on both the guest architecture
1958 and on the specific capabilities we are simulating for that guest
1959 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1960 or 32. General rule: if in doubt, return a value larger than
1961 reality.
1963 This information is needed by Cachegrind and Callgrind to decide
1964 what the minimum cache line size they are prepared to simulate is.
1965 Basically require that the minimum cache line size is at least as
1966 large as the largest register that might get transferred to/from
1967 memory, so as to guarantee that any such transaction can straddle
1968 at most 2 cache lines.
1970 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1972 vg_assert(hwcaps_done);
1973 /* Once hwcaps_done is True, we can fish around inside va/vai to
1974 find the information we need. */
1976 # if defined(VGA_x86)
1977 vg_assert(va == VexArchX86);
1978 /* We don't support AVX, so 32 is out. At the other end, even if
1979 we don't support any SSE, the X87 can generate 10 byte
1980 transfers, so let's say 16 to be on the safe side. Hence the
1981 answer is always 16. */
1982 return 16;
1984 # elif defined(VGA_amd64)
1985 /* if AVX then 32 else 16 */
1986 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1988 # elif defined(VGA_ppc32)
1989 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1990 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1991 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1992 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1993 return 8;
1995 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1996 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1997 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1998 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1999 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
2000 return 8;
2002 # elif defined(VGA_s390x)
2003 return 8;
2005 # elif defined(VGA_arm)
2006 /* Really it depends whether or not we have NEON, but let's just
2007 assume we always do. */
2008 return 16;
2010 # elif defined(VGA_arm64)
2011 /* ARM64 always has Neon, AFAICS. */
2012 return 16;
2014 # elif defined(VGA_mips32)
2015 /* The guest state implies 4, but that can't really be true, can
2016 it? */
2017 return 8;
2019 # elif defined(VGA_mips64)
2020 return 8;
2022 # else
2023 # error "Unknown arch"
2024 # endif
2028 // Given a pointer to a function as obtained by "& functionname" in C,
2029 // produce a pointer to the actual entry point for the function.
2030 void* VG_(fnptr_to_fnentry)( void* f )
2032 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2033 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2034 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2035 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2036 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2037 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2038 return f;
2039 # elif defined(VGP_ppc64be_linux)
2040 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2041 3-word function descriptor, of which the first word is the entry
2042 address. */
2043 UWord* descr = (UWord*)f;
2044 return (void*)(descr[0]);
2045 # else
2046 # error "Unknown platform"
2047 # endif
2050 /*--------------------------------------------------------------------*/
2051 /*--- end ---*/
2052 /*--------------------------------------------------------------------*/