1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation
9 Copyright (C) 2000-2017 Julian Seward
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcprint.h"
34 #include "pub_core_libcfile.h"
35 #include "pub_core_libcprint.h"
36 #include "pub_core_libcproc.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
48 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
50 Addr
VG_(get_IP
) ( ThreadId tid
) {
51 return INSTR_PTR( VG_(threads
)[tid
].arch
);
53 Addr
VG_(get_SP
) ( ThreadId tid
) {
54 return STACK_PTR( VG_(threads
)[tid
].arch
);
56 Addr
VG_(get_FP
) ( ThreadId tid
) {
57 return FRAME_PTR( VG_(threads
)[tid
].arch
);
60 Addr
VG_(get_SP_s1
) ( ThreadId tid
) {
61 return STACK_PTR_S1( VG_(threads
)[tid
].arch
);
63 void VG_(set_SP_s1
) ( ThreadId tid
, Addr sp
) {
64 STACK_PTR_S1( VG_(threads
)[tid
].arch
) = sp
;
67 void VG_(set_IP
) ( ThreadId tid
, Addr ip
) {
68 INSTR_PTR( VG_(threads
)[tid
].arch
) = ip
;
70 void VG_(set_SP
) ( ThreadId tid
, Addr sp
) {
71 STACK_PTR( VG_(threads
)[tid
].arch
) = sp
;
74 void VG_(get_UnwindStartRegs
) ( /*OUT*/UnwindStartRegs
* regs
,
78 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_EIP
;
79 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_ESP
;
81 = VG_(threads
)[tid
].arch
.vex
.guest_EBP
;
82 # elif defined(VGA_amd64)
83 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_RIP
;
84 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_RSP
;
85 regs
->misc
.AMD64
.r_rbp
86 = VG_(threads
)[tid
].arch
.vex
.guest_RBP
;
87 # elif defined(VGA_ppc32)
88 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
89 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
91 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
92 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
93 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
94 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
96 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
97 # elif defined(VGA_arm)
98 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R15T
;
99 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R13
;
101 = VG_(threads
)[tid
].arch
.vex
.guest_R14
;
103 = VG_(threads
)[tid
].arch
.vex
.guest_R12
;
105 = VG_(threads
)[tid
].arch
.vex
.guest_R11
;
107 = VG_(threads
)[tid
].arch
.vex
.guest_R7
;
108 # elif defined(VGA_arm64)
109 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
110 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_XSP
;
111 regs
->misc
.ARM64
.x29
= VG_(threads
)[tid
].arch
.vex
.guest_X29
;
112 regs
->misc
.ARM64
.x30
= VG_(threads
)[tid
].arch
.vex
.guest_X30
;
113 # elif defined(VGA_s390x)
114 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_IA
;
115 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_SP
;
116 regs
->misc
.S390X
.r_fp
117 = VG_(threads
)[tid
].arch
.vex
.guest_FP
;
118 regs
->misc
.S390X
.r_lr
119 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
120 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
121 regs
->misc
.S390X
.r_f0
122 = VG_(threads
)[tid
].arch
.vex
.guest_v0
.w64
[0];
123 regs
->misc
.S390X
.r_f1
124 = VG_(threads
)[tid
].arch
.vex
.guest_v1
.w64
[0];
125 regs
->misc
.S390X
.r_f2
126 = VG_(threads
)[tid
].arch
.vex
.guest_v2
.w64
[0];
127 regs
->misc
.S390X
.r_f3
128 = VG_(threads
)[tid
].arch
.vex
.guest_v3
.w64
[0];
129 regs
->misc
.S390X
.r_f4
130 = VG_(threads
)[tid
].arch
.vex
.guest_v4
.w64
[0];
131 regs
->misc
.S390X
.r_f5
132 = VG_(threads
)[tid
].arch
.vex
.guest_v5
.w64
[0];
133 regs
->misc
.S390X
.r_f6
134 = VG_(threads
)[tid
].arch
.vex
.guest_v6
.w64
[0];
135 regs
->misc
.S390X
.r_f7
136 = VG_(threads
)[tid
].arch
.vex
.guest_v7
.w64
[0];
137 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
138 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
139 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
140 regs
->misc
.MIPS32
.r30
141 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
142 regs
->misc
.MIPS32
.r31
143 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
144 regs
->misc
.MIPS32
.r28
145 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
146 # elif defined(VGA_mips64)
147 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
148 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
149 regs
->misc
.MIPS64
.r30
150 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
151 regs
->misc
.MIPS64
.r31
152 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
153 regs
->misc
.MIPS64
.r28
154 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
156 # error "Unknown arch"
161 VG_(get_shadow_regs_area
) ( ThreadId tid
,
163 /*SRC*/Int shadowNo
, PtrdiffT offset
, SizeT size
)
167 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
168 vg_assert(VG_(is_valid_tid
)(tid
));
170 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
171 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
173 tst
= & VG_(threads
)[tid
];
176 case 0: src
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
177 case 1: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
178 case 2: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
180 vg_assert(src
!= NULL
);
181 VG_(memcpy
)( dst
, src
, size
);
185 VG_(set_shadow_regs_area
) ( ThreadId tid
,
186 /*DST*/Int shadowNo
, PtrdiffT offset
, SizeT size
,
187 /*SRC*/const UChar
* src
)
191 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
192 vg_assert(VG_(is_valid_tid
)(tid
));
194 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
195 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
197 tst
= & VG_(threads
)[tid
];
200 case 0: dst
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
201 case 1: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
202 case 2: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
204 vg_assert(dst
!= NULL
);
205 VG_(memcpy
)( dst
, src
, size
);
209 static void apply_to_GPs_of_tid(ThreadId tid
, void (*f
)(ThreadId
,
212 VexGuestArchState
* vex
= &(VG_(get_ThreadState
)(tid
)->arch
.vex
);
213 VG_(debugLog
)(2, "machine", "apply_to_GPs_of_tid %u\n", tid
);
215 (*f
)(tid
, "EAX", vex
->guest_EAX
);
216 (*f
)(tid
, "ECX", vex
->guest_ECX
);
217 (*f
)(tid
, "EDX", vex
->guest_EDX
);
218 (*f
)(tid
, "EBX", vex
->guest_EBX
);
219 (*f
)(tid
, "ESI", vex
->guest_ESI
);
220 (*f
)(tid
, "EDI", vex
->guest_EDI
);
221 (*f
)(tid
, "ESP", vex
->guest_ESP
);
222 (*f
)(tid
, "EBP", vex
->guest_EBP
);
223 #elif defined(VGA_amd64)
224 (*f
)(tid
, "RAX", vex
->guest_RAX
);
225 (*f
)(tid
, "RCX", vex
->guest_RCX
);
226 (*f
)(tid
, "RDX", vex
->guest_RDX
);
227 (*f
)(tid
, "RBX", vex
->guest_RBX
);
228 (*f
)(tid
, "RSI", vex
->guest_RSI
);
229 (*f
)(tid
, "RDI", vex
->guest_RDI
);
230 (*f
)(tid
, "RSP", vex
->guest_RSP
);
231 (*f
)(tid
, "RBP", vex
->guest_RBP
);
232 (*f
)(tid
, "R8" , vex
->guest_R8
);
233 (*f
)(tid
, "R9" , vex
->guest_R9
);
234 (*f
)(tid
, "R10", vex
->guest_R10
);
235 (*f
)(tid
, "R11", vex
->guest_R11
);
236 (*f
)(tid
, "R12", vex
->guest_R12
);
237 (*f
)(tid
, "R13", vex
->guest_R13
);
238 (*f
)(tid
, "R14", vex
->guest_R14
);
239 (*f
)(tid
, "R15", vex
->guest_R15
);
240 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
241 (*f
)(tid
, "GPR0" , vex
->guest_GPR0
);
242 (*f
)(tid
, "GPR1" , vex
->guest_GPR1
);
243 (*f
)(tid
, "GPR2" , vex
->guest_GPR2
);
244 (*f
)(tid
, "GPR3" , vex
->guest_GPR3
);
245 (*f
)(tid
, "GPR4" , vex
->guest_GPR4
);
246 (*f
)(tid
, "GPR5" , vex
->guest_GPR5
);
247 (*f
)(tid
, "GPR6" , vex
->guest_GPR6
);
248 (*f
)(tid
, "GPR7" , vex
->guest_GPR7
);
249 (*f
)(tid
, "GPR8" , vex
->guest_GPR8
);
250 (*f
)(tid
, "GPR9" , vex
->guest_GPR9
);
251 (*f
)(tid
, "GPR10", vex
->guest_GPR10
);
252 (*f
)(tid
, "GPR11", vex
->guest_GPR11
);
253 (*f
)(tid
, "GPR12", vex
->guest_GPR12
);
254 (*f
)(tid
, "GPR13", vex
->guest_GPR13
);
255 (*f
)(tid
, "GPR14", vex
->guest_GPR14
);
256 (*f
)(tid
, "GPR15", vex
->guest_GPR15
);
257 (*f
)(tid
, "GPR16", vex
->guest_GPR16
);
258 (*f
)(tid
, "GPR17", vex
->guest_GPR17
);
259 (*f
)(tid
, "GPR18", vex
->guest_GPR18
);
260 (*f
)(tid
, "GPR19", vex
->guest_GPR19
);
261 (*f
)(tid
, "GPR20", vex
->guest_GPR20
);
262 (*f
)(tid
, "GPR21", vex
->guest_GPR21
);
263 (*f
)(tid
, "GPR22", vex
->guest_GPR22
);
264 (*f
)(tid
, "GPR23", vex
->guest_GPR23
);
265 (*f
)(tid
, "GPR24", vex
->guest_GPR24
);
266 (*f
)(tid
, "GPR25", vex
->guest_GPR25
);
267 (*f
)(tid
, "GPR26", vex
->guest_GPR26
);
268 (*f
)(tid
, "GPR27", vex
->guest_GPR27
);
269 (*f
)(tid
, "GPR28", vex
->guest_GPR28
);
270 (*f
)(tid
, "GPR29", vex
->guest_GPR29
);
271 (*f
)(tid
, "GPR30", vex
->guest_GPR30
);
272 (*f
)(tid
, "GPR31", vex
->guest_GPR31
);
273 (*f
)(tid
, "CTR" , vex
->guest_CTR
);
274 (*f
)(tid
, "LR" , vex
->guest_LR
);
275 #elif defined(VGA_arm)
276 (*f
)(tid
, "R0" , vex
->guest_R0
);
277 (*f
)(tid
, "R1" , vex
->guest_R1
);
278 (*f
)(tid
, "R2" , vex
->guest_R2
);
279 (*f
)(tid
, "R3" , vex
->guest_R3
);
280 (*f
)(tid
, "R4" , vex
->guest_R4
);
281 (*f
)(tid
, "R5" , vex
->guest_R5
);
282 (*f
)(tid
, "R6" , vex
->guest_R6
);
283 (*f
)(tid
, "R8" , vex
->guest_R8
);
284 (*f
)(tid
, "R9" , vex
->guest_R9
);
285 (*f
)(tid
, "R10", vex
->guest_R10
);
286 (*f
)(tid
, "R11", vex
->guest_R11
);
287 (*f
)(tid
, "R12", vex
->guest_R12
);
288 (*f
)(tid
, "R13", vex
->guest_R13
);
289 (*f
)(tid
, "R14", vex
->guest_R14
);
290 #elif defined(VGA_s390x)
291 (*f
)(tid
, "r0" , vex
->guest_r0
);
292 (*f
)(tid
, "r1" , vex
->guest_r1
);
293 (*f
)(tid
, "r2" , vex
->guest_r2
);
294 (*f
)(tid
, "r3" , vex
->guest_r3
);
295 (*f
)(tid
, "r4" , vex
->guest_r4
);
296 (*f
)(tid
, "r5" , vex
->guest_r5
);
297 (*f
)(tid
, "r6" , vex
->guest_r6
);
298 (*f
)(tid
, "r7" , vex
->guest_r7
);
299 (*f
)(tid
, "r8" , vex
->guest_r8
);
300 (*f
)(tid
, "r9" , vex
->guest_r9
);
301 (*f
)(tid
, "r10", vex
->guest_r10
);
302 (*f
)(tid
, "r11", vex
->guest_r11
);
303 (*f
)(tid
, "r12", vex
->guest_r12
);
304 (*f
)(tid
, "r13", vex
->guest_r13
);
305 (*f
)(tid
, "r14", vex
->guest_r14
);
306 (*f
)(tid
, "r15", vex
->guest_r15
);
307 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGP_nanomips_linux)
308 (*f
)(tid
, "r0" , vex
->guest_r0
);
309 (*f
)(tid
, "r1" , vex
->guest_r1
);
310 (*f
)(tid
, "r2" , vex
->guest_r2
);
311 (*f
)(tid
, "r3" , vex
->guest_r3
);
312 (*f
)(tid
, "r4" , vex
->guest_r4
);
313 (*f
)(tid
, "r5" , vex
->guest_r5
);
314 (*f
)(tid
, "r6" , vex
->guest_r6
);
315 (*f
)(tid
, "r7" , vex
->guest_r7
);
316 (*f
)(tid
, "r8" , vex
->guest_r8
);
317 (*f
)(tid
, "r9" , vex
->guest_r9
);
318 (*f
)(tid
, "r10", vex
->guest_r10
);
319 (*f
)(tid
, "r11", vex
->guest_r11
);
320 (*f
)(tid
, "r12", vex
->guest_r12
);
321 (*f
)(tid
, "r13", vex
->guest_r13
);
322 (*f
)(tid
, "r14", vex
->guest_r14
);
323 (*f
)(tid
, "r15", vex
->guest_r15
);
324 (*f
)(tid
, "r16", vex
->guest_r16
);
325 (*f
)(tid
, "r17", vex
->guest_r17
);
326 (*f
)(tid
, "r18", vex
->guest_r18
);
327 (*f
)(tid
, "r19", vex
->guest_r19
);
328 (*f
)(tid
, "r20", vex
->guest_r20
);
329 (*f
)(tid
, "r21", vex
->guest_r21
);
330 (*f
)(tid
, "r22", vex
->guest_r22
);
331 (*f
)(tid
, "r23", vex
->guest_r23
);
332 (*f
)(tid
, "r24", vex
->guest_r24
);
333 (*f
)(tid
, "r25", vex
->guest_r25
);
334 (*f
)(tid
, "r26", vex
->guest_r26
);
335 (*f
)(tid
, "r27", vex
->guest_r27
);
336 (*f
)(tid
, "r28", vex
->guest_r28
);
337 (*f
)(tid
, "r29", vex
->guest_r29
);
338 (*f
)(tid
, "r30", vex
->guest_r30
);
339 (*f
)(tid
, "r31", vex
->guest_r31
);
340 #elif defined(VGA_arm64)
341 (*f
)(tid
, "x0" , vex
->guest_X0
);
342 (*f
)(tid
, "x1" , vex
->guest_X1
);
343 (*f
)(tid
, "x2" , vex
->guest_X2
);
344 (*f
)(tid
, "x3" , vex
->guest_X3
);
345 (*f
)(tid
, "x4" , vex
->guest_X4
);
346 (*f
)(tid
, "x5" , vex
->guest_X5
);
347 (*f
)(tid
, "x6" , vex
->guest_X6
);
348 (*f
)(tid
, "x7" , vex
->guest_X7
);
349 (*f
)(tid
, "x8" , vex
->guest_X8
);
350 (*f
)(tid
, "x9" , vex
->guest_X9
);
351 (*f
)(tid
, "x10", vex
->guest_X10
);
352 (*f
)(tid
, "x11", vex
->guest_X11
);
353 (*f
)(tid
, "x12", vex
->guest_X12
);
354 (*f
)(tid
, "x13", vex
->guest_X13
);
355 (*f
)(tid
, "x14", vex
->guest_X14
);
356 (*f
)(tid
, "x15", vex
->guest_X15
);
357 (*f
)(tid
, "x16", vex
->guest_X16
);
358 (*f
)(tid
, "x17", vex
->guest_X17
);
359 (*f
)(tid
, "x18", vex
->guest_X18
);
360 (*f
)(tid
, "x19", vex
->guest_X19
);
361 (*f
)(tid
, "x20", vex
->guest_X20
);
362 (*f
)(tid
, "x21", vex
->guest_X21
);
363 (*f
)(tid
, "x22", vex
->guest_X22
);
364 (*f
)(tid
, "x23", vex
->guest_X23
);
365 (*f
)(tid
, "x24", vex
->guest_X24
);
366 (*f
)(tid
, "x25", vex
->guest_X25
);
367 (*f
)(tid
, "x26", vex
->guest_X26
);
368 (*f
)(tid
, "x27", vex
->guest_X27
);
369 (*f
)(tid
, "x28", vex
->guest_X28
);
370 (*f
)(tid
, "x29", vex
->guest_X29
);
371 (*f
)(tid
, "x30", vex
->guest_X30
);
378 void VG_(apply_to_GP_regs
)(void (*f
)(ThreadId
, const HChar
*, UWord
))
382 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
383 if (VG_(is_valid_tid
)(tid
)
384 || VG_(threads
)[tid
].exitreason
== VgSrc_ExitProcess
) {
385 // live thread or thread instructed to die by another thread that
387 apply_to_GPs_of_tid(tid
, f
);
392 void VG_(thread_stack_reset_iter
)(/*OUT*/ThreadId
* tid
)
394 *tid
= (ThreadId
)(-1);
397 Bool
VG_(thread_stack_next
)(/*MOD*/ThreadId
* tid
,
398 /*OUT*/Addr
* stack_min
,
399 /*OUT*/Addr
* stack_max
)
402 for (i
= (*tid
)+1; i
< VG_N_THREADS
; i
++) {
403 if (i
== VG_INVALID_THREADID
)
405 if (VG_(threads
)[i
].status
!= VgTs_Empty
) {
407 *stack_min
= VG_(get_SP
)(i
);
408 *stack_max
= VG_(threads
)[i
].client_stack_highest_byte
;
415 Addr
VG_(thread_get_stack_max
)(ThreadId tid
)
417 vg_assert(tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
418 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
419 return VG_(threads
)[tid
].client_stack_highest_byte
;
422 SizeT
VG_(thread_get_stack_size
)(ThreadId tid
)
424 vg_assert(tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
425 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
426 return VG_(threads
)[tid
].client_stack_szB
;
429 Addr
VG_(thread_get_altstack_min
)(ThreadId tid
)
431 vg_assert(tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
432 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
433 return (Addr
)VG_(threads
)[tid
].altstack
.ss_sp
;
436 SizeT
VG_(thread_get_altstack_size
)(ThreadId tid
)
438 vg_assert(tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
439 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
440 return VG_(threads
)[tid
].altstack
.ss_size
;
443 //-------------------------------------------------------------
444 /* Details about the capabilities of the underlying (host) CPU. These
445 details are acquired by (1) enquiring with the CPU at startup, or
446 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
447 line size). It's a bit nasty in the sense that there's no obvious
448 way to stop uses of some of this info before it's ready to go.
449 See pub_core_machine.h for more information about that.
451 VG_(machine_get_hwcaps) may use signals (although it attempts to
452 leave signal state unchanged) and therefore should only be
453 called before m_main sets up the client's signal state.
456 /* --------- State --------- */
457 static Bool hwcaps_done
= False
;
459 /* --- all archs --- */
460 static VexArch va
= VexArch_INVALID
;
461 static VexArchInfo vai
;
464 UInt
VG_(machine_x86_have_mxcsr
) = 0;
466 #if defined(VGA_ppc32)
467 UInt
VG_(machine_ppc32_has_FP
) = 0;
468 UInt
VG_(machine_ppc32_has_VMX
) = 0;
470 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
471 ULong
VG_(machine_ppc64_has_VMX
) = 0;
474 Int
VG_(machine_arm_archlevel
) = 4;
478 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
479 testing, so we need a VG_MINIMAL_JMP_BUF. */
480 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
481 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
482 || defined(VGA_mips64) || defined(VGA_arm64)
483 #include "pub_core_libcsetjmp.h"
484 static VG_MINIMAL_JMP_BUF(env_unsup_insn
);
485 static void handler_unsup_insn ( Int x
) {
486 VG_MINIMAL_LONGJMP(env_unsup_insn
);
491 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
492 * handlers are installed. Determines the sizes affected by dcbz
493 * and dcbzl instructions and updates the given VexArchInfo structure
496 * Not very defensive: assumes that as long as the dcbz/dcbzl
497 * instructions don't raise a SIGILL, that they will zero an aligned,
498 * contiguous block of memory of a sensible size. */
499 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
500 static void find_ppc_dcbz_sz(VexArchInfo
*arch_info
)
504 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
505 char test_block
[4*MAX_DCBZL_SZB
];
506 char *aligned
= test_block
;
509 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
510 aligned
= (char *)(((HWord
)aligned
+ MAX_DCBZL_SZB
) & ~(MAX_DCBZL_SZB
- 1));
511 vg_assert((aligned
+ MAX_DCBZL_SZB
) <= &test_block
[sizeof(test_block
)]);
513 /* dcbz often clears 32B, although sometimes whatever the native cache
515 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
516 __asm__
__volatile__("dcbz 0,%0"
518 : "r" (aligned
) /*in*/
519 : "memory" /*clobber*/);
520 for (dcbz_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
524 vg_assert(dcbz_szB
== 16 || dcbz_szB
== 32 || dcbz_szB
== 64 || dcbz_szB
== 128);
526 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
527 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
528 dcbzl_szB
= 0; /* indicates unsupported */
531 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
532 /* some older assemblers won't understand the dcbzl instruction
533 * variant, so we directly emit the instruction ourselves */
534 __asm__
__volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
536 : "r" (aligned
) /*in*/
537 : "memory", "r9" /*clobber*/);
538 for (dcbzl_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
542 vg_assert(dcbzl_szB
== 16 || dcbzl_szB
== 32 || dcbzl_szB
== 64 || dcbzl_szB
== 128);
545 arch_info
->ppc_dcbz_szB
= dcbz_szB
;
546 arch_info
->ppc_dcbzl_szB
= dcbzl_szB
;
548 VG_(debugLog
)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
549 dcbz_szB
, dcbzl_szB
);
550 # undef MAX_DCBZL_SZB
552 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
556 /* Read /proc/cpuinfo. Look for lines like these
558 processor 0: version = FF, identification = 0117C9, machine = 2064
560 and return the machine model. If the machine model could not be determined
561 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
563 static UInt
VG_(get_machine_model
)(void)
565 static struct model_map
{
569 { "2064", VEX_S390X_MODEL_Z900
},
570 { "2066", VEX_S390X_MODEL_Z800
},
571 { "2084", VEX_S390X_MODEL_Z990
},
572 { "2086", VEX_S390X_MODEL_Z890
},
573 { "2094", VEX_S390X_MODEL_Z9_EC
},
574 { "2096", VEX_S390X_MODEL_Z9_BC
},
575 { "2097", VEX_S390X_MODEL_Z10_EC
},
576 { "2098", VEX_S390X_MODEL_Z10_BC
},
577 { "2817", VEX_S390X_MODEL_Z196
},
578 { "2818", VEX_S390X_MODEL_Z114
},
579 { "2827", VEX_S390X_MODEL_ZEC12
},
580 { "2828", VEX_S390X_MODEL_ZBC12
},
581 { "2964", VEX_S390X_MODEL_Z13
},
582 { "2965", VEX_S390X_MODEL_Z13S
},
583 { "3906", VEX_S390X_MODEL_Z14
},
584 { "3907", VEX_S390X_MODEL_Z14_ZR1
},
585 { "8561", VEX_S390X_MODEL_Z15
},
586 { "8562", VEX_S390X_MODEL_Z15
},
587 { "3931", VEX_S390X_MODEL_Z16
},
588 { "3932", VEX_S390X_MODEL_Z16
},
593 SizeT num_bytes
, file_buf_size
;
594 HChar
*p
, *m
, *model_name
, *file_buf
;
596 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
597 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
598 if ( sr_isError(fd
) ) return VEX_S390X_MODEL_UNKNOWN
;
602 /* Determine the size of /proc/cpuinfo.
603 Work around broken-ness in /proc file system implementation.
604 fstat returns a zero size for /proc/cpuinfo although it is
605 claimed to be a regular file. */
607 file_buf_size
= 1000;
608 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
610 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
614 if (n
< file_buf_size
) break; /* reached EOF */
617 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
619 if (num_bytes
> file_buf_size
) {
620 VG_(free
)( file_buf
);
621 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
622 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
623 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
624 if (n
< 0) num_bytes
= 0;
627 file_buf
[num_bytes
] = '\0';
631 model
= VEX_S390X_MODEL_UNKNOWN
;
632 for (p
= file_buf
; *p
; ++p
) {
633 /* Beginning of line */
634 if (VG_(strncmp
)( p
, "processor", sizeof "processor" - 1 ) != 0) continue;
636 m
= VG_(strstr
)( p
, "machine" );
637 if (m
== NULL
) continue;
639 p
= m
+ sizeof "machine" - 1;
640 while ( VG_(isspace
)( *p
) || *p
== '=') {
641 if (*p
== '\n') goto next_line
;
646 for (n
= 0; n
< sizeof model_map
/ sizeof model_map
[0]; ++n
) {
647 struct model_map
*mm
= model_map
+ n
;
648 SizeT len
= VG_(strlen
)( mm
->name
);
649 if ( VG_(strncmp
)( mm
->name
, model_name
, len
) == 0 &&
650 VG_(isspace
)( model_name
[len
] )) {
651 if (mm
->id
< model
) model
= mm
->id
;
652 p
= model_name
+ len
;
656 /* Skip until end-of-line */
662 VG_(free
)( file_buf
);
663 VG_(debugLog
)(1, "machine", "model = %s\n",
664 model
== VEX_S390X_MODEL_UNKNOWN
? "UNKNOWN"
665 : model_map
[model
].name
);
669 #endif /* defined(VGA_s390x) */
671 #if defined(VGA_mips32) || defined(VGA_mips64)
674 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
675 * determine what CPU it is (it searches only for the models that are or may be
676 * supported by Valgrind).
678 static Bool
VG_(parse_cpuinfo
)(void)
680 const char *search_Broadcom_str
= "cpu model\t\t: Broadcom";
681 const char *search_Cavium_str
= "cpu model\t\t: Cavium";
682 const char *search_Ingenic_str
= "cpu model\t\t: Ingenic";
683 const char *search_Loongson_str
= "cpu model\t\t: ICT Loongson";
684 const char *search_MIPS_str
= "cpu model\t\t: MIPS";
685 const char *search_Netlogic_str
= "cpu model\t\t: Netlogic";
689 SizeT num_bytes
, file_buf_size
;
690 HChar
*file_buf
, *isa
;
692 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
693 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
694 if ( sr_isError(fd
) ) return False
;
698 /* Determine the size of /proc/cpuinfo.
699 Work around broken-ness in /proc file system implementation.
700 fstat returns a zero size for /proc/cpuinfo although it is
701 claimed to be a regular file. */
703 file_buf_size
= 1000;
704 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
706 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
710 if (n
< file_buf_size
) break; /* reached EOF */
713 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
715 if (num_bytes
> file_buf_size
) {
716 VG_(free
)( file_buf
);
717 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
718 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
719 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
720 if (n
< 0) num_bytes
= 0;
723 file_buf
[num_bytes
] = '\0';
727 if (VG_(strstr
)(file_buf
, search_Broadcom_str
) != NULL
)
728 vai
.hwcaps
= VEX_PRID_COMP_BROADCOM
;
729 else if (VG_(strstr
)(file_buf
, search_Netlogic_str
) != NULL
)
730 vai
.hwcaps
= VEX_PRID_COMP_NETLOGIC
;
731 else if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
732 vai
.hwcaps
= VEX_PRID_COMP_CAVIUM
;
733 else if (VG_(strstr
)(file_buf
, search_MIPS_str
) != NULL
)
734 vai
.hwcaps
= VEX_PRID_COMP_MIPS
;
735 else if (VG_(strstr
)(file_buf
, search_Ingenic_str
) != NULL
)
736 vai
.hwcaps
= VEX_PRID_COMP_INGENIC_E1
;
737 else if (VG_(strstr
)(file_buf
, search_Loongson_str
) != NULL
)
738 vai
.hwcaps
= (VEX_PRID_COMP_LEGACY
| VEX_PRID_IMP_LOONGSON_64
);
740 /* Did not find string in the proc file. */
746 isa
= VG_(strstr
)(file_buf
, "isa\t\t\t: ");
749 if (VG_(strstr
) (isa
, "mips32r1") != NULL
)
750 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
751 if (VG_(strstr
) (isa
, "mips32r2") != NULL
)
752 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
753 if (VG_(strstr
) (isa
, "mips32r6") != NULL
)
754 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R6
;
755 if (VG_(strstr
) (isa
, "mips64r1") != NULL
)
756 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R1
;
757 if (VG_(strstr
) (isa
, "mips64r2") != NULL
)
758 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
;
759 if (VG_(strstr
) (isa
, "mips64r6") != NULL
)
760 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R6
;
763 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
764 * decide to change incorrect settings in
765 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
766 * The current settings show mips32r1, mips32r2 and mips64r1 as
767 * unsupported ISAs by Cavium MIPS CPUs.
769 if (VEX_MIPS_COMP_ID(vai
.hwcaps
) == VEX_PRID_COMP_CAVIUM
) {
770 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
| VEX_MIPS_CPU_ISA_M32R2
|
771 VEX_MIPS_CPU_ISA_M64R1
;
775 * Kernel does not provide information about supported ISAs.
776 * Populate the isa level flags based on the CPU model. That is our
779 switch VEX_MIPS_COMP_ID(vai
.hwcaps
) {
780 case VEX_PRID_COMP_CAVIUM
:
781 case VEX_PRID_COMP_NETLOGIC
:
782 vai
.hwcaps
|= (VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
);
784 case VEX_PRID_COMP_INGENIC_E1
:
785 case VEX_PRID_COMP_MIPS
:
786 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
788 case VEX_PRID_COMP_BROADCOM
:
789 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
791 case VEX_PRID_COMP_LEGACY
:
792 if ((VEX_MIPS_PROC_ID(vai
.hwcaps
) == VEX_PRID_IMP_LOONGSON_64
))
793 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
|
794 VEX_MIPS_CPU_ISA_M32R2
| VEX_MIPS_CPU_ISA_M32R1
;
804 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
806 #if defined(VGP_arm64_linux)
808 /* Check to see whether we are running on a Cavium core, and if so auto-enable
809 the fallback LLSC implementation. See #369459. */
811 static Bool
VG_(parse_cpuinfo
)(void)
813 const char *search_Cavium_str
= "CPU implementer\t: 0x43";
817 SizeT num_bytes
, file_buf_size
;
820 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
821 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
822 if ( sr_isError(fd
) ) return False
;
826 /* Determine the size of /proc/cpuinfo.
827 Work around broken-ness in /proc file system implementation.
828 fstat returns a zero size for /proc/cpuinfo although it is
829 claimed to be a regular file. */
831 file_buf_size
= 1000;
832 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
834 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
838 if (n
< file_buf_size
) break; /* reached EOF */
841 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
843 if (num_bytes
> file_buf_size
) {
844 VG_(free
)( file_buf
);
845 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
846 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
847 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
848 if (n
< 0) num_bytes
= 0;
851 file_buf
[num_bytes
] = '\0';
855 if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
856 vai
.arm64_requires_fallback_LLSC
= True
;
862 #endif /* defined(VGP_arm64_linux) */
864 Bool
VG_(machine_get_hwcaps
)( void )
866 vg_assert(hwcaps_done
== False
);
869 // Whack default settings into vai, so that we only need to fill in
870 // any interesting bits.
871 LibVEX_default_VexArchInfo(&vai
);
874 { Bool have_sse1
, have_sse2
, have_sse3
, have_cx8
, have_lzcnt
, have_mmxext
;
875 UInt eax
, ebx
, ecx
, edx
, max_extended
;
879 if (!VG_(has_cpuid
)())
880 /* we can't do cpuid at all. Give up. */
883 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
885 /* we can't ask for cpuid(x) for x > 0. Give up. */
888 /* Get processor ID string, and max basic/extended index
890 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
891 VG_(memcpy
)(&vstr
[4], &edx
, 4);
892 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
895 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
898 /* get capabilities bits into edx */
899 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
901 have_sse1
= (edx
& (1<<25)) != 0; /* True => have sse insns */
902 have_sse2
= (edx
& (1<<26)) != 0; /* True => have sse2 insns */
903 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
905 /* cmpxchg8b is a minimum requirement now; if we don't have it we
906 must simply give up. But all CPUs since Pentium-I have it, so
907 that doesn't seem like much of a restriction. */
908 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
912 #if defined(VGP_x86_freebsd)
913 if (have_sse1
|| have_sse2
) {
916 /* Regardless of whether cpuid says, the OS has to enable SSE first! */
918 error
= VG_(sysctlbyname
)("hw.instruction_sse", &sc
, &scl
, 0, 0);
919 if (error
== -1 || sc
!= 1) {
922 VG_(message
)(Vg_UserMsg
, "Warning: cpu has SSE, but the OS has not enabled it. Disabling in valgrind!");
926 /* Figure out if this is an AMD that can do MMXEXT. */
928 if (0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
929 && max_extended
>= 0x80000001) {
930 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
931 /* Some older AMD processors support a sse1 subset (Integer SSE). */
932 have_mmxext
= !have_sse1
&& ((edx
& (1<<22)) != 0);
935 /* Figure out if this is an AMD or Intel that can do LZCNT. */
937 if ((0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
938 || 0 == VG_(strcmp
)(vstr
, "GenuineIntel"))
939 && max_extended
>= 0x80000001) {
940 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
941 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
944 /* Intel processors don't define the mmxext extension, but since it
945 is just a sse1 subset always define it when we have sse1. */
950 vai
.endness
= VexEndnessLE
;
952 if (have_sse3
&& have_sse2
&& have_sse1
&& have_mmxext
) {
953 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
954 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
955 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
956 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE3
;
958 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
959 VG_(machine_x86_have_mxcsr
) = 1;
960 } else if (have_sse2
&& have_sse1
&& have_mmxext
) {
961 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
962 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
963 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
965 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
966 VG_(machine_x86_have_mxcsr
) = 1;
967 } else if (have_sse1
&& have_mmxext
) {
968 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
969 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
970 VG_(machine_x86_have_mxcsr
) = 1;
971 } else if (have_mmxext
) {
972 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
; /*integer only sse1 subset*/
973 VG_(machine_x86_have_mxcsr
) = 0;
975 vai
.hwcaps
= 0; /*baseline - no sse at all*/
976 VG_(machine_x86_have_mxcsr
) = 0;
979 VG_(machine_get_cache_info
)(&vai
);
984 #elif defined(VGA_amd64)
985 { Bool have_sse3
, have_ssse3
, have_cx8
, have_cx16
;
986 Bool have_lzcnt
, have_avx
, have_bmi
, have_avx2
;
987 Bool have_fma3
, have_fma4
;
988 Bool have_rdtscp
, have_rdrand
, have_f16c
, have_rdseed
;
989 UInt eax
, ebx
, ecx
, edx
, max_basic
, max_extended
;
994 have_sse3
= have_ssse3
= have_cx8
= have_cx16
995 = have_lzcnt
= have_avx
= have_bmi
= have_avx2
996 = have_rdtscp
= have_rdrand
= have_f16c
= have_rdseed
997 = have_fma3
= have_fma4
= False
;
999 eax
= ebx
= ecx
= edx
= max_basic
= max_extended
= 0;
1001 if (!VG_(has_cpuid
)())
1002 /* we can't do cpuid at all. Give up. */
1005 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
1008 /* we can't ask for cpuid(x) for x > 0. Give up. */
1011 /* Get processor ID string, and max basic/extended index
1013 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
1014 VG_(memcpy
)(&vstr
[4], &edx
, 4);
1015 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
1018 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
1021 /* get capabilities bits into edx */
1022 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
1024 // we assume that SSE1 and SSE2 are available by default
1025 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
1026 have_ssse3
= (ecx
& (1<<9)) != 0; /* True => have Sup SSE3 insns */
1027 have_fma3
= (ecx
& (1<<12))!= 0; /* True => have fma3 insns */
1031 // osxsave is ecx:27
1033 have_f16c
= (ecx
& (1<<29)) != 0; /* True => have F16C insns */
1034 have_rdrand
= (ecx
& (1<<30)) != 0; /* True => have RDRAND insns */
1038 if ( (ecx
& ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1039 /* Processor supports AVX instructions and XGETBV is enabled
1040 by OS and AVX instructions are enabled by the OS. */
1042 __asm__
__volatile__("movq $0,%%rcx ; "
1043 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1045 :/*OUT*/"=r"(w
) :/*IN*/
1046 :/*TRASH*/"rdx","rcx","rax");
1048 if ((xgetbv_0
& 7) == 7) {
1049 /* Only say we have AVX if the XSAVE-allowable
1050 bitfield-mask allows x87, SSE and AVX state. We could
1051 actually run with a more restrictive XGETBV(0) value,
1052 but VEX's implementation of XSAVE and XRSTOR assumes
1053 that all 3 bits are enabled.
1055 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1056 state component [2] (the YMM high halves) are located in
1057 the XSAVE image at offsets 576 .. 831. So we have to
1058 check that here before declaring AVX to be supported. */
1059 UInt eax2
, ebx2
, ecx2
, edx2
;
1060 VG_(cpuid
)(0xD, 2, &eax2
, &ebx2
, &ecx2
, &edx2
);
1061 if (ebx2
== 576 && eax2
== 256) {
1067 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1068 must simply give up. But all CPUs since Pentium-I have it, so
1069 that doesn't seem like much of a restriction. */
1070 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
1074 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1075 have_cx16
= (ecx
& (1<<13)) != 0; /* True => have cmpxchg16b */
1077 /* Figure out if this CPU can do LZCNT. */
1079 if (max_extended
>= 0x80000001) {
1080 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1081 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
1084 /* Can we do RDTSCP? */
1085 have_rdtscp
= False
;
1086 if (max_extended
>= 0x80000001) {
1087 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1088 have_rdtscp
= (edx
& (1<<27)) != 0; /* True => have RDTSVCP */
1091 if (max_extended
>= 0x80000001) {
1092 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1093 have_fma4
= (ecx
& (1<<16)) != 0; /* True => have fma4 */
1096 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1099 if (have_avx
&& max_basic
>= 7) {
1100 VG_(cpuid
)(7, 0, &eax
, &ebx
, &ecx
, &edx
);
1101 have_bmi
= (ebx
& (1<<3)) != 0; /* True => have BMI1 */
1102 have_avx2
= (ebx
& (1<<5)) != 0; /* True => have AVX2 */
1103 have_rdseed
= (ebx
& (1<<18)) != 0; /* True => have RDSEED insns */
1106 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1107 it's convenient to restrict them to the AVX case since the simulated
1108 CPUID we'll offer them on has AVX as a base. */
1111 have_rdrand
= False
;
1112 have_rdseed
= False
;
1116 vai
.endness
= VexEndnessLE
;
1117 vai
.hwcaps
= (have_sse3
? VEX_HWCAPS_AMD64_SSE3
: 0)
1118 | (have_ssse3
? VEX_HWCAPS_AMD64_SSSE3
: 0)
1119 | (have_cx16
? VEX_HWCAPS_AMD64_CX16
: 0)
1120 | (have_lzcnt
? VEX_HWCAPS_AMD64_LZCNT
: 0)
1121 | (have_avx
? VEX_HWCAPS_AMD64_AVX
: 0)
1122 | (have_bmi
? VEX_HWCAPS_AMD64_BMI
: 0)
1123 | (have_avx2
? VEX_HWCAPS_AMD64_AVX2
: 0)
1124 | (have_rdtscp
? VEX_HWCAPS_AMD64_RDTSCP
: 0)
1125 | (have_f16c
? VEX_HWCAPS_AMD64_F16C
: 0)
1126 | (have_rdrand
? VEX_HWCAPS_AMD64_RDRAND
: 0)
1127 | (have_rdseed
? VEX_HWCAPS_AMD64_RDSEED
: 0)
1128 | (have_fma3
? VEX_HWCAPS_AMD64_FMA3
: 0)
1129 | (have_fma4
? VEX_HWCAPS_AMD64_FMA4
: 0);
1131 VG_(machine_get_cache_info
)(&vai
);
1136 #elif defined(VGA_ppc32)
1138 /* Find out which subset of the ppc32 instruction set is supported by
1139 verifying whether various ppc32 instructions generate a SIGILL
1140 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1141 AT_PLATFORM entries in the ELF auxiliary table -- see also
1142 the_iifii.client_auxv in m_main.c.
1144 vki_sigset_t saved_set
, tmp_set
;
1145 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1146 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1148 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1149 volatile Bool have_isa_2_07
, have_isa_3_0
;
1152 /* This is a kludge. Really we ought to back-convert saved_act
1153 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1154 since that's a no-op on all ppc32 platforms so far supported,
1155 it's not worth the typing effort. At least include most basic
1157 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1159 VG_(sigemptyset
)(&tmp_set
);
1160 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1161 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1163 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1166 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1168 tmp_sigill_act
= saved_sigill_act
;
1170 r
= VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1172 tmp_sigfpe_act
= saved_sigfpe_act
;
1174 /* NODEFER: signal handler does not return (from the kernel's point of
1175 view), hence if it is to successfully catch a signal more than once,
1176 we need the NODEFER flag. */
1177 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1178 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1179 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1180 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1181 r
= VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1184 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1185 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1186 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1187 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1188 r
= VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1191 /* standard FP insns */
1193 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1196 __asm__
__volatile__(".long 0xFC000090"); /*fmr 0,0 */
1201 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1204 /* Unfortunately some older assemblers don't speak Altivec (or
1205 choose not to), so to be safe we directly emit the 32-bit
1206 word corresponding to "vor 0,0,0". This fixes a build
1207 problem that happens on Debian 3.1 (ppc32), and probably
1208 various other places. */
1209 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1212 /* General-Purpose optional (fsqrt, fsqrts) */
1214 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1217 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1220 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1222 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1225 __asm__
__volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1228 /* VSX support implies Power ISA 2.06 */
1230 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1233 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1236 /* Check for Decimal Floating Point (DFP) support. */
1238 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1241 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1244 /* Check for ISA 2.07 support. */
1245 have_isa_2_07
= True
;
1246 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1247 have_isa_2_07
= False
;
1249 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1252 /* Check for ISA 3.0 support. */
1253 have_isa_3_0
= True
;
1254 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1255 have_isa_3_0
= False
;
1257 __asm__
__volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
1260 // ISA 3.1 not supported on 32-bit systems
1262 // scv instruction not supported on 32-bit systems.
1264 /* determine dcbz/dcbzl sizes while we still have the signal
1265 * handlers registered */
1266 find_ppc_dcbz_sz(&vai
);
1268 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1270 r
= VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1272 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1274 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1275 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1276 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1277 (Int
)have_isa_2_07
, (Int
)have_isa_3_0
);
1278 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1279 if (have_V
&& !have_F
)
1281 if (have_FX
&& !have_F
)
1283 if (have_GX
&& !have_F
)
1286 VG_(machine_ppc32_has_FP
) = have_F
? 1 : 0;
1287 VG_(machine_ppc32_has_VMX
) = have_V
? 1 : 0;
1290 vai
.endness
= VexEndnessBE
;
1293 if (have_F
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_F
;
1294 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_V
;
1295 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_FX
;
1296 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_GX
;
1297 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_VX
;
1298 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_DFP
;
1299 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA2_07
;
1300 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA3_0
;
1301 /* ISA 3.1 not supported on 32-bit systems. */
1302 /* SCV not supported on PPC32 */
1304 VG_(machine_get_cache_info
)(&vai
);
1306 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1307 called before we're ready to go. */
1311 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1313 /* Same instruction set detection algorithm as for ppc32. */
1314 vki_sigset_t saved_set
, tmp_set
;
1315 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1316 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1318 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1319 volatile Bool have_isa_2_07
, have_isa_3_0
, have_isa_3_1
;
1322 /* This is a kludge. Really we ought to back-convert saved_act
1323 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1324 since that's a no-op on all ppc64 platforms so far supported,
1325 it's not worth the typing effort. At least include most basic
1327 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1329 VG_(sigemptyset
)(&tmp_set
);
1330 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1331 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1333 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1336 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1338 tmp_sigill_act
= saved_sigill_act
;
1340 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1341 tmp_sigfpe_act
= saved_sigfpe_act
;
1343 /* NODEFER: signal handler does not return (from the kernel's point of
1344 view), hence if it is to successfully catch a signal more than once,
1345 we need the NODEFER flag. */
1346 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1347 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1348 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1349 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1350 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1352 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1353 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1354 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1355 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1356 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1358 /* standard FP insns */
1360 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1363 __asm__
__volatile__("fmr 0,0");
1368 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1371 __asm__
__volatile__(".long 0x10000484"); /* vor v0,v0,v0 */
1374 /* General-Purpose optional (fsqrt, fsqrts) */
1376 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1379 __asm__
__volatile__(".long 0xFC00002C"); /* fsqrt f0,f0 */
1382 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1384 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1387 __asm__
__volatile__(".long 0xFC000034"); /* frsqrte f0,f0 */
1390 /* VSX support implies Power ISA 2.06 */
1392 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1395 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp vs0,vs0 */
1398 /* Check for Decimal Floating Point (DFP) support. */
1400 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1403 __asm__
__volatile__(".long 0xec0e8005"); /* dadd f0,f14,f16 */
1406 /* Check for ISA 2.07 support. */
1407 have_isa_2_07
= True
;
1408 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1409 have_isa_2_07
= False
;
1411 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd f0,r0 */
1414 /* Check for ISA 3.0 support. */
1415 have_isa_3_0
= True
;
1416 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1417 have_isa_3_0
= False
;
1419 __asm__
__volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
1422 /* Check if Host supports scv instruction.
1423 Note, can not use the usual method of issuing the scv instruction and
1424 checking if it is supported or not. Issuing scv on a system that does
1425 not have scv support in the HWCAPS generates a message in dmesg,
1426 "Facility 'SCV' unavailable (12), exception". It is considered bad
1427 form to issue and scv on systems that do not support it.
1429 The function VG_(machine_ppc64_set_scv_support), is called in
1430 initimg-linux.c to set the flag ppc_scv_supported based on HWCAPS2
1431 value. The flag ppc_scv_supported is defined struct VexArchInfo,
1432 in file libvex.h The setting of ppc_scv_supported in VexArchInfo
1433 is checked in disInstr_PPC_WRK() to set the allow_scv flag. */
1435 /* Check for ISA 3.1 support. */
1436 have_isa_3_1
= True
;
1437 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1438 have_isa_3_1
= False
;
1440 __asm__
__volatile__(".long 0x7f1401b6":::"r20"); /* brh r20,r24 */
1443 /* determine dcbz/dcbzl sizes while we still have the signal
1444 * handlers registered */
1445 find_ppc_dcbz_sz(&vai
);
1447 VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1448 VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1449 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1450 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d ISA3.1 %d\n",
1451 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1452 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1453 (Int
)have_isa_2_07
, (int)have_isa_3_0
, (int)have_isa_3_1
);
1454 /* on ppc64be, if we don't even have FP, just give up. */
1458 VG_(machine_ppc64_has_VMX
) = have_V
? 1 : 0;
1461 # if defined(VKI_LITTLE_ENDIAN)
1462 vai
.endness
= VexEndnessLE
;
1463 # elif defined(VKI_BIG_ENDIAN)
1464 vai
.endness
= VexEndnessBE
;
1466 vai
.endness
= VexEndness_INVALID
;
1470 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_V
;
1471 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_FX
;
1472 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_GX
;
1473 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_VX
;
1474 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_DFP
;
1475 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA2_07
;
1476 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_0
;
1477 if (have_isa_3_1
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_1
;
1479 VG_(machine_get_cache_info
)(&vai
);
1481 /* But we're not done yet: VG_(machine_ppc64_set_clszB) and
1482 VG_(machine_ppc64_set_scv_support) must be called before we're
1487 #elif defined(VGA_s390x)
1489 # include "libvex_s390x_common.h"
1492 /* Instruction set detection code borrowed from ppc above. */
1493 vki_sigset_t saved_set
, tmp_set
;
1494 vki_sigaction_fromK_t saved_sigill_act
;
1495 vki_sigaction_toK_t tmp_sigill_act
;
1497 volatile Bool have_LDISP
, have_STFLE
;
1500 /* If the model is "unknown" don't treat this as an error. Assume
1501 this is a brand-new machine model for which we don't have the
1502 identification yet. Keeping fingers crossed. */
1503 model
= VG_(get_machine_model
)();
1505 /* Unblock SIGILL and stash away the old action for that signal */
1506 VG_(sigemptyset
)(&tmp_set
);
1507 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1509 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1512 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1514 tmp_sigill_act
= saved_sigill_act
;
1516 /* NODEFER: signal handler does not return (from the kernel's point of
1517 view), hence if it is to successfully catch a signal more than once,
1518 we need the NODEFER flag. */
1519 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1520 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1521 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1522 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1523 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1525 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1526 is not supported on z900. */
1529 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1532 /* BASR loads the address of the next insn into r1. Needed to avoid
1533 a segfault in XY. */
1534 __asm__
__volatile__("basr %%r1,%%r0\n\t"
1535 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1536 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1539 /* Check availability of STFLE. If available store facility bits
1541 ULong hoststfle
[S390_NUM_FACILITY_DW
];
1543 for (i
= 0; i
< S390_NUM_FACILITY_DW
; ++i
)
1547 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1550 register ULong reg0
asm("0") = S390_NUM_FACILITY_DW
- 1;
1552 __asm__(".insn s,0xb2b00000,%0" /* stfle */
1553 : "=Q"(hoststfle
), "+d"(reg0
)
1558 /* Restore signals */
1559 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1561 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1564 vai
.endness
= VexEndnessBE
;
1567 if (have_STFLE
) vai
.hwcaps
|= VEX_HWCAPS_S390X_STFLE
;
1569 /* Use long displacement only on machines >= z990. For all other
1570 machines it is millicoded and therefore slow. */
1571 if (model
>= VEX_S390X_MODEL_Z990
)
1572 vai
.hwcaps
|= VEX_HWCAPS_S390X_LDISP
;
1575 /* Detect presence of certain facilities using the STFLE insn.
1576 Note, that these facilities were introduced at the same time or later
1577 as STFLE, so the absence of STLFE implies the absence of the facility
1578 we're trying to detect. */
1579 struct fac_hwcaps_map
{
1583 const HChar name
[6]; // may need adjustment for new facility names
1585 { False
, S390_FAC_EIMM
, VEX_HWCAPS_S390X_EIMM
, "EIMM" },
1586 { False
, S390_FAC_GIE
, VEX_HWCAPS_S390X_GIE
, "GIE" },
1587 { False
, S390_FAC_DFP
, VEX_HWCAPS_S390X_DFP
, "DFP" },
1588 { False
, S390_FAC_FPSE
, VEX_HWCAPS_S390X_FGX
, "FGX" },
1589 { False
, S390_FAC_ETF2
, VEX_HWCAPS_S390X_ETF2
, "ETF2" },
1590 { False
, S390_FAC_ETF3
, VEX_HWCAPS_S390X_ETF3
, "ETF3" },
1591 { False
, S390_FAC_STCKF
, VEX_HWCAPS_S390X_STCKF
, "STCKF" },
1592 { False
, S390_FAC_FPEXT
, VEX_HWCAPS_S390X_FPEXT
, "FPEXT" },
1593 { False
, S390_FAC_LSC
, VEX_HWCAPS_S390X_LSC
, "LSC" },
1594 { False
, S390_FAC_PFPO
, VEX_HWCAPS_S390X_PFPO
, "PFPO" },
1595 { False
, S390_FAC_VX
, VEX_HWCAPS_S390X_VX
, "VX" },
1596 { False
, S390_FAC_MSA5
, VEX_HWCAPS_S390X_MSA5
, "MSA5" },
1597 { False
, S390_FAC_MI2
, VEX_HWCAPS_S390X_MI2
, "MI2" },
1598 { False
, S390_FAC_LSC2
, VEX_HWCAPS_S390X_LSC2
, "LSC2" },
1599 { False
, S390_FAC_VXE
, VEX_HWCAPS_S390X_VXE
, "VXE" },
1600 { False
, S390_FAC_DFLT
, VEX_HWCAPS_S390X_DFLT
, "DFLT" },
1601 { False
, S390_FAC_NNPA
, VEX_HWCAPS_S390X_NNPA
, "NNPA" },
1604 /* Set hwcaps according to the detected facilities */
1605 UChar dw_number
= 0;
1607 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1608 vg_assert(fac_hwcaps
[i
].facility_bit
<= 191); // for now
1609 dw_number
= fac_hwcaps
[i
].facility_bit
/ 64;
1610 fac_bit
= fac_hwcaps
[i
].facility_bit
% 64;
1611 if (hoststfle
[dw_number
] & (1ULL << (63 - fac_bit
))) {
1612 fac_hwcaps
[i
].installed
= True
;
1613 vai
.hwcaps
|= fac_hwcaps
[i
].hwcaps_bit
;
1617 /* Build up a string showing the probed-for facilities */
1618 HChar fac_str
[(sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]) *
1619 (sizeof fac_hwcaps
[0].name
+ 3) + // %s %d
1620 7 + 1 + 4 + 2 // machine %4d
1623 p
+= VG_(sprintf
)(p
, "machine %4d ", model
);
1624 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1625 p
+= VG_(sprintf
)(p
, " %s %1u", fac_hwcaps
[i
].name
,
1626 fac_hwcaps
[i
].installed
);
1630 VG_(debugLog
)(1, "machine", "%s\n", fac_str
);
1631 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1633 VG_(machine_get_cache_info
)(&vai
);
1638 #elif defined(VGA_arm)
1640 /* Same instruction set detection algorithm as for ppc32. */
1641 vki_sigset_t saved_set
, tmp_set
;
1642 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1643 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1645 volatile Bool have_VFP
, have_VFP2
, have_VFP3
, have_NEON
, have_V8
;
1646 volatile Int archlevel
;
1649 /* This is a kludge. Really we ought to back-convert saved_act
1650 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1651 since that's a no-op on all ppc64 platforms so far supported,
1652 it's not worth the typing effort. At least include most basic
1654 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1656 VG_(sigemptyset
)(&tmp_set
);
1657 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1658 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1660 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1663 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1665 tmp_sigill_act
= saved_sigill_act
;
1667 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1668 tmp_sigfpe_act
= saved_sigfpe_act
;
1670 /* NODEFER: signal handler does not return (from the kernel's point of
1671 view), hence if it is to successfully catch a signal more than once,
1672 we need the NODEFER flag. */
1673 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1674 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1675 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1676 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1677 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1679 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1680 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1681 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1682 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1683 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1687 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1690 __asm__
__volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1692 /* There are several generation of VFP extension but they differs very
1693 little so for now we will not distinguish them. */
1694 have_VFP2
= have_VFP
;
1695 have_VFP3
= have_VFP
;
1699 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1702 __asm__
__volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1705 /* ARM architecture level */
1706 archlevel
= 5; /* v5 will be base level */
1707 if (archlevel
< 7) {
1709 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1712 __asm__
__volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1715 if (archlevel
< 6) {
1717 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1720 __asm__
__volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1726 if (archlevel
== 7) {
1727 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1730 __asm__
__volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1732 if (have_V8
&& have_NEON
&& have_VFP3
) {
1737 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1738 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigfpe_act
, &tmp_sigfpe_act
);
1739 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1740 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1741 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1743 VG_(debugLog
)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1744 archlevel
, (Int
)have_VFP
, (Int
)have_VFP2
, (Int
)have_VFP3
,
1747 VG_(machine_arm_archlevel
) = archlevel
;
1750 vai
.endness
= VexEndnessLE
;
1752 vai
.hwcaps
= VEX_ARM_ARCHLEVEL(archlevel
);
1753 if (have_VFP3
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP3
;
1754 if (have_VFP2
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP2
;
1755 if (have_VFP
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP
;
1756 if (have_NEON
) vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1758 VG_(machine_get_cache_info
)(&vai
);
1763 #elif defined(VGA_arm64)
1765 /* Use the attribute and feature registers to determine host hardware
1766 * capabilities. Only user-space features are read. Naming conventions
1767 * follow the Arm Architecture Reference Manual.
1769 * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
1771 * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1772 * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1773 * FHM DP SM4 SM3 SHA3 RDM ATOMICS
1775 * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
1777 * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1778 * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1781 * ID_AA64PFR0_EL1 Processor Feature Register 0
1783 * 6666...2222 2222 1111 1111 11
1784 * 3210...7654 3210 9876 5432 1098 7654 3210
1788 Bool is_base_v8
= False
;
1790 Bool have_fhm
, have_dp
, have_sm4
, have_sm3
, have_sha3
, have_rdm
;
1791 Bool have_atomics
, have_i8mm
, have_bf16
, have_dpbcvap
, have_dpbcvadp
;
1792 Bool have_vfp16
, have_fp16
;
1794 have_fhm
= have_dp
= have_sm4
= have_sm3
= have_sha3
= have_rdm
1795 = have_atomics
= have_i8mm
= have_bf16
= have_dpbcvap
1796 = have_dpbcvadp
= have_vfp16
= have_fp16
= False
;
1798 /* Some baseline v8.0 kernels do not allow reads of these registers. Use
1799 * the same SIGILL handling algorithm as other architectures for such
1802 vki_sigset_t saved_set
, tmp_set
;
1803 vki_sigaction_fromK_t saved_sigill_act
;
1804 vki_sigaction_toK_t tmp_sigill_act
;
1806 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1808 VG_(sigemptyset
)(&tmp_set
);
1809 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1813 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1816 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1818 tmp_sigill_act
= saved_sigill_act
;
1820 /* NODEFER: signal handler does not return (from the kernel's point of
1821 view), hence if it is to successfully catch a signal more than once,
1822 we need the NODEFER flag. */
1823 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1824 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1825 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1826 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1827 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1829 /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
1830 if (VG_MINIMAL_SETJMP(env_unsup_insn
))
1833 __asm__
__volatile__("mrs x0, ID_AA64ISAR0_EL1");
1835 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1836 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1837 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1840 vai
.endness
= VexEndnessLE
;
1842 /* Baseline features are v8.0. */
1845 VG_(machine_get_cache_info
)(&vai
);
1847 // @todo PJF ARM64 if we need this then we can't parse anything in /proc
1848 #if !defined(VGP_arm64_freebsd)
1849 /* Check whether we need to use the fallback LLSC implementation.
1850 If the check fails, give up. */
1851 if (! VG_(parse_cpuinfo
)())
1855 /* 0 denotes 'not set'. The range of legitimate values here,
1856 after being set that is, is 2 though 17 inclusive. */
1857 vg_assert(vai
.arm64_dMinLine_lg2_szB
== 0);
1858 vg_assert(vai
.arm64_iMinLine_lg2_szB
== 0);
1860 __asm__
__volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0
));
1861 vai
.arm64_dMinLine_lg2_szB
= ((ctr_el0
>> 16) & 0xF) + 2;
1862 vai
.arm64_iMinLine_lg2_szB
= ((ctr_el0
>> 0) & 0xF) + 2;
1863 VG_(debugLog
)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1864 "ctr_el0.iMinLine_szB = %d\n",
1865 1 << vai
.arm64_dMinLine_lg2_szB
,
1866 1 << vai
.arm64_iMinLine_lg2_szB
);
1867 VG_(debugLog
)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1868 vai
.arm64_requires_fallback_LLSC
? "yes" : "no");
1873 /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
1874 #define ID_AA64ISAR0_FHM_SHIFT 48
1875 #define ID_AA64ISAR0_DP_SHIFT 44
1876 #define ID_AA64ISAR0_SM4_SHIFT 40
1877 #define ID_AA64ISAR0_SM3_SHIFT 36
1878 #define ID_AA64ISAR0_SHA3_SHIFT 32
1879 #define ID_AA64ISAR0_RDM_SHIFT 28
1880 #define ID_AA64ISAR0_ATOMICS_SHIFT 20
1882 #define ID_AA64ISAR0_FHM_SUPPORTED 0x1
1883 #define ID_AA64ISAR0_DP_SUPPORTED 0x1
1884 #define ID_AA64ISAR0_SM4_SUPPORTED 0x1
1885 #define ID_AA64ISAR0_SM3_SUPPORTED 0x1
1886 #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
1887 #define ID_AA64ISAR0_RDM_SUPPORTED 0x1
1888 #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
1890 /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
1891 #define ID_AA64ISAR1_I8MM_SHIFT 52
1892 #define ID_AA64ISAR1_BF16_SHIFT 44
1893 #define ID_AA64ISAR1_DPB_SHIFT 0
1895 #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
1896 #define ID_AA64ISAR1_BF16_SUPPORTED 0x1
1897 #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
1898 #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
1900 /* ID_AA64PFR0_EL1 Processor feature register 0 fields */
1901 #define ID_AA64PFR0_VFP16_SHIFT 20
1902 #define ID_AA64PFR0_FP16_SHIFT 16
1904 #define ID_AA64PFR0_VFP16_SUPPORTED 0x1
1905 #define ID_AA64PFR0_FP16_SUPPORTED 0x1
1907 #define get_cpu_ftr(id) ({ \
1908 unsigned long val; \
1909 asm("mrs %0, "#id : "=r" (val)); \
1910 VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
1912 get_cpu_ftr(ID_AA64ISAR0_EL1
);
1913 get_cpu_ftr(ID_AA64ISAR1_EL1
);
1914 get_cpu_ftr(ID_AA64PFR0_EL1
);
1916 #define get_ftr(id, ftr, fval, have_ftr) ({ \
1917 unsigned long rval; \
1918 asm("mrs %0, "#id : "=r" (rval)); \
1919 have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
1922 /* Read ID_AA64ISAR0_EL1 attributes */
1924 /* FHM indicates support for FMLAL and FMLSL instructions.
1925 * Optional for v8.2.
1927 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_FHM_SHIFT
,
1928 ID_AA64ISAR0_FHM_SUPPORTED
, have_fhm
);
1930 /* DP indicates support for UDOT and SDOT instructions.
1931 * Optional for v8.2.
1933 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_DP_SHIFT
,
1934 ID_AA64ISAR0_DP_SUPPORTED
, have_dp
);
1936 /* SM4 indicates support for SM4E and SM4EKEY instructions.
1937 * Optional for v8.2.
1939 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_SM4_SHIFT
,
1940 ID_AA64ISAR0_SM4_SUPPORTED
, have_sm4
);
1942 /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
1943 * SM3PARTW1, and SM3PARTW2 instructions.
1944 * Optional for v8.2.
1946 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_SM3_SHIFT
,
1947 ID_AA64ISAR0_SM3_SUPPORTED
, have_sm3
);
1949 /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
1950 * Optional for v8.2.
1952 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_SHA3_SHIFT
,
1953 ID_AA64ISAR0_SHA3_SUPPORTED
, have_sha3
);
1955 /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
1956 * Mandatory from v8.1 onwards.
1958 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_RDM_SHIFT
,
1959 ID_AA64ISAR0_RDM_SUPPORTED
, have_rdm
);
1961 /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
1962 * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
1963 * Mandatory from v8.1 onwards.
1965 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_ATOMICS_SHIFT
,
1966 ID_AA64ISAR0_ATOMICS_SUPPORTED
, have_atomics
);
1968 /* Read ID_AA64ISAR1_EL1 attributes */
1970 /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
1972 * Optional for v8.2.
1974 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_I8MM_SHIFT
,
1975 ID_AA64ISAR1_I8MM_SUPPORTED
, have_i8mm
);
1977 /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
1978 * BFCVT2 instructions.
1979 * Optional for v8.2.
1981 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_BF16_SHIFT
,
1982 ID_AA64ISAR1_BF16_SUPPORTED
, have_bf16
);
1984 /* DPB indicates support for DC CVAP instruction.
1985 * Mandatory for v8.2 onwards.
1987 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_DPB_SHIFT
,
1988 ID_AA64ISAR1_DPBCVAP_SUPPORTED
, have_dpbcvap
);
1990 /* DPB indicates support for DC CVADP instruction.
1991 * Optional for v8.2.
1993 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_DPB_SHIFT
,
1994 ID_AA64ISAR1_DPBCVADP_SUPPORTED
, have_dpbcvadp
);
1996 /* Read ID_AA64PFR0_EL1 attributes */
1998 /* VFP16 indicates support for half-precision vector arithmetic.
1999 * Optional for v8.2. Must be the same value as FP16.
2001 get_ftr(ID_AA64PFR0_EL1
, ID_AA64PFR0_VFP16_SHIFT
,
2002 ID_AA64PFR0_VFP16_SUPPORTED
, have_vfp16
);
2004 /* FP16 indicates support for half-precision scalar arithmetic.
2005 * Optional for v8.2. Must be the same value as VFP16.
2007 get_ftr(ID_AA64PFR0_EL1
, ID_AA64PFR0_FP16_SHIFT
,
2008 ID_AA64PFR0_FP16_SUPPORTED
, have_fp16
);
2010 if (have_fhm
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_FHM
;
2011 if (have_dpbcvap
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_DPBCVAP
;
2012 if (have_dpbcvadp
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_DPBCVADP
;
2013 if (have_sm3
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_SM3
;
2014 if (have_sm4
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_SM4
;
2015 if (have_sha3
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_SHA3
;
2016 if (have_rdm
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_RDM
;
2017 if (have_i8mm
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_I8MM
;
2018 if (have_atomics
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_ATOMICS
;
2019 if (have_bf16
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_BF16
;
2020 if (have_fp16
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_FP16
;
2021 if (have_vfp16
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_VFP16
;
2029 #elif defined(VGA_mips32)
2031 /* Define the position of F64 bit in FIR register. */
2034 if (!VG_(parse_cpuinfo
)())
2037 # if defined(VKI_LITTLE_ENDIAN)
2038 vai
.endness
= VexEndnessLE
;
2039 # elif defined(VKI_BIG_ENDIAN)
2040 vai
.endness
= VexEndnessBE
;
2042 vai
.endness
= VexEndness_INVALID
;
2045 /* Same instruction set detection algorithm as for ppc32/arm... */
2046 vki_sigset_t saved_set
, tmp_set
;
2047 vki_sigaction_fromK_t saved_sigill_act
;
2048 vki_sigaction_toK_t tmp_sigill_act
;
2050 volatile Bool have_DSP
, have_DSPr2
, have_MSA
;
2053 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
2055 VG_(sigemptyset
)(&tmp_set
);
2056 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
2058 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
2061 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
2063 tmp_sigill_act
= saved_sigill_act
;
2065 /* NODEFER: signal handler does not return (from the kernel's point of
2066 view), hence if it is to successfully catch a signal more than once,
2067 we need the NODEFER flag. */
2068 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
2069 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
2070 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
2071 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
2072 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2074 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
2076 /* MSA instructions. */
2078 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2081 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2084 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
2086 /* DSPr2 instructions. */
2088 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2091 __asm__
__volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
2094 /* We assume it's 74K, since it can run DSPr2. */
2095 vai
.hwcaps
|= VEX_PRID_IMP_74K
;
2097 /* DSP instructions. */
2099 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2102 __asm__
__volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
2105 /* We assume it's 34K, since it has support for DSP. */
2106 vai
.hwcaps
|= VEX_PRID_IMP_34K
;
2112 # if defined(VGP_mips32_linux)
2113 Int fpmode
= VG_(prctl
)(VKI_PR_GET_FP_MODE
, 0, 0, 0, 0);
2119 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
2120 we are using alternative way to determine FP mode */
2123 if (!VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2126 ".set noreorder\n\t"
2128 ".set hardfloat\n\t"
2129 "lui $t0, 0x3FF0\n\t"
2136 : "t0", "$f0", "$f1", "memory");
2138 fpmode
= (result
!= 0x3FF0000000000000ull
);
2143 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
2145 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
2146 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2147 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
2149 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
2150 VG_(machine_get_cache_info
)(&vai
);
2155 #elif defined(VGA_mips64)
2158 if (!VG_(parse_cpuinfo
)())
2161 # if defined(VKI_LITTLE_ENDIAN)
2162 vai
.endness
= VexEndnessLE
;
2163 # elif defined(VKI_BIG_ENDIAN)
2164 vai
.endness
= VexEndnessBE
;
2166 vai
.endness
= VexEndness_INVALID
;
2169 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
2171 /* Same instruction set detection algorithm as for ppc32/arm... */
2172 vki_sigset_t saved_set
, tmp_set
;
2173 vki_sigaction_fromK_t saved_sigill_act
;
2174 vki_sigaction_toK_t tmp_sigill_act
;
2176 volatile Bool have_MSA
;
2179 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
2181 VG_(sigemptyset
)(&tmp_set
);
2182 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
2184 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
2187 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
2189 tmp_sigill_act
= saved_sigill_act
;
2191 /* NODEFER: signal handler does not return (from the kernel's point of
2192 view), hence if it is to successfully catch a signal more than once,
2193 we need the NODEFER flag. */
2194 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
2195 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
2196 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
2197 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
2198 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2200 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
2202 /* MSA instructions */
2204 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2207 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2210 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
2214 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
2215 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2216 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
2218 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
2220 VG_(machine_get_cache_info
)(&vai
);
2225 #elif defined(VGP_nanomips_linux)
2227 va
= VexArchNANOMIPS
;
2230 # if defined(VKI_LITTLE_ENDIAN)
2231 vai
.endness
= VexEndnessLE
;
2232 # elif defined(VKI_BIG_ENDIAN)
2233 vai
.endness
= VexEndnessBE
;
2235 vai
.endness
= VexEndness_INVALID
;
2238 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
2240 VG_(machine_get_cache_info
)(&vai
);
2245 # error "Unknown arch"
2249 /* Notify host cpu instruction cache line size. */
2250 #if defined(VGA_ppc32)
2251 void VG_(machine_ppc32_set_clszB
)( Int szB
)
2253 vg_assert(hwcaps_done
);
2255 /* Either the value must not have been set yet (zero) or we can
2256 tolerate it being set to the same value multiple times, as the
2257 stack scanning logic in m_main is a bit stupid. */
2258 vg_assert(vai
.ppc_icache_line_szB
== 0
2259 || vai
.ppc_icache_line_szB
== szB
);
2261 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
2262 vai
.ppc_icache_line_szB
= szB
;
2267 /* Notify host cpu instruction cache line size. */
2268 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
2269 void VG_(machine_ppc64_set_clszB
)( Int szB
)
2271 vg_assert(hwcaps_done
);
2273 /* Either the value must not have been set yet (zero) or we can
2274 tolerate it being set to the same value multiple times, as the
2275 stack scanning logic in m_main is a bit stupid. */
2276 vg_assert(vai
.ppc_icache_line_szB
== 0
2277 || vai
.ppc_icache_line_szB
== szB
);
2279 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
2280 vai
.ppc_icache_line_szB
= szB
;
2283 void VG_(machine_ppc64_set_scv_support
)( Int is_supported
)
2285 vg_assert(hwcaps_done
);
2286 vai
.ppc_scv_supported
= is_supported
;
2292 /* Notify host's ability to handle NEON instructions. */
2293 #if defined(VGA_arm)
2294 void VG_(machine_arm_set_has_NEON
)( Bool has_neon
)
2296 vg_assert(hwcaps_done
);
2297 /* There's nothing else we can sanity check. */
2300 vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
2302 vai
.hwcaps
&= ~VEX_HWCAPS_ARM_NEON
;
2308 /* Fetch host cpu info, once established. */
2309 void VG_(machine_get_VexArchInfo
)( /*OUT*/VexArch
* pVa
,
2310 /*OUT*/VexArchInfo
* pVai
)
2312 vg_assert(hwcaps_done
);
2314 if (pVai
) *pVai
= vai
;
2318 /* Returns the size of the largest guest register that we will
2319 simulate in this run. This depends on both the guest architecture
2320 and on the specific capabilities we are simulating for that guest
2321 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2322 or 32. General rule: if in doubt, return a value larger than
2325 This information is needed by Cachegrind and Callgrind to decide
2326 what the minimum cache line size they are prepared to simulate is.
2327 Basically require that the minimum cache line size is at least as
2328 large as the largest register that might get transferred to/from
2329 memory, so as to guarantee that any such transaction can straddle
2330 at most 2 cache lines.
2332 Int
VG_(machine_get_size_of_largest_guest_register
) ( void )
2334 vg_assert(hwcaps_done
);
2335 /* Once hwcaps_done is True, we can fish around inside va/vai to
2336 find the information we need. */
2338 # if defined(VGA_x86)
2339 vg_assert(va
== VexArchX86
);
2340 /* We don't support AVX, so 32 is out. At the other end, even if
2341 we don't support any SSE, the X87 can generate 10 byte
2342 transfers, so let's say 16 to be on the safe side. Hence the
2343 answer is always 16. */
2346 # elif defined(VGA_amd64)
2347 /* if AVX then 32 else 16 */
2348 return (vai
.hwcaps
& VEX_HWCAPS_AMD64_AVX
) ? 32 : 16;
2350 # elif defined(VGA_ppc32)
2351 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2352 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_V
) return 16;
2353 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_VX
) return 16;
2354 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_DFP
) return 16;
2357 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2358 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2359 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_V
) return 16;
2360 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_VX
) return 16;
2361 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_DFP
) return 16;
2364 # elif defined(VGA_s390x)
2367 # elif defined(VGA_arm)
2368 /* Really it depends whether or not we have NEON, but let's just
2369 assume we always do. */
2372 # elif defined(VGA_arm64)
2373 /* ARM64 always has Neon, AFAICS. */
2376 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
2377 /* The guest state implies 4, but that can't really be true, can
2381 # elif defined(VGA_mips64)
2385 # error "Unknown arch"
2390 // Given a pointer to a function as obtained by "& functionname" in C,
2391 // produce a pointer to the actual entry point for the function.
2392 void* VG_(fnptr_to_fnentry
)( void* f
)
2394 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2395 || defined(VGP_arm_linux) || defined(VGO_darwin) || defined(VGO_freebsd) \
2396 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2397 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2398 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2399 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
2400 || defined(VGP_nanomips_linux)
2402 # elif defined(VGP_ppc64be_linux)
2403 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2404 3-word function descriptor, of which the first word is the entry
2406 UWord
* descr
= (UWord
*)f
;
2407 return (void*)(descr
[0]);
2409 # error "Unknown platform"
2413 /*--------------------------------------------------------------------*/
2415 /*--------------------------------------------------------------------*/