1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation
9 Copyright (C) 2000-2017 Julian Seward
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr
VG_(get_IP
) ( ThreadId tid
) {
50 return INSTR_PTR( VG_(threads
)[tid
].arch
);
52 Addr
VG_(get_SP
) ( ThreadId tid
) {
53 return STACK_PTR( VG_(threads
)[tid
].arch
);
55 Addr
VG_(get_FP
) ( ThreadId tid
) {
56 return FRAME_PTR( VG_(threads
)[tid
].arch
);
59 Addr
VG_(get_SP_s1
) ( ThreadId tid
) {
60 return STACK_PTR_S1( VG_(threads
)[tid
].arch
);
62 void VG_(set_SP_s1
) ( ThreadId tid
, Addr sp
) {
63 STACK_PTR_S1( VG_(threads
)[tid
].arch
) = sp
;
66 void VG_(set_IP
) ( ThreadId tid
, Addr ip
) {
67 INSTR_PTR( VG_(threads
)[tid
].arch
) = ip
;
69 void VG_(set_SP
) ( ThreadId tid
, Addr sp
) {
70 STACK_PTR( VG_(threads
)[tid
].arch
) = sp
;
73 void VG_(get_UnwindStartRegs
) ( /*OUT*/UnwindStartRegs
* regs
,
77 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_EIP
;
78 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_ESP
;
80 = VG_(threads
)[tid
].arch
.vex
.guest_EBP
;
81 # elif defined(VGA_amd64)
82 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_RIP
;
83 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_RSP
;
84 regs
->misc
.AMD64
.r_rbp
85 = VG_(threads
)[tid
].arch
.vex
.guest_RBP
;
86 # elif defined(VGA_ppc32)
87 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
88 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
90 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
93 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
95 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
96 # elif defined(VGA_arm)
97 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R15T
;
98 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R13
;
100 = VG_(threads
)[tid
].arch
.vex
.guest_R14
;
102 = VG_(threads
)[tid
].arch
.vex
.guest_R12
;
104 = VG_(threads
)[tid
].arch
.vex
.guest_R11
;
106 = VG_(threads
)[tid
].arch
.vex
.guest_R7
;
107 # elif defined(VGA_arm64)
108 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
109 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_XSP
;
110 regs
->misc
.ARM64
.x29
= VG_(threads
)[tid
].arch
.vex
.guest_X29
;
111 regs
->misc
.ARM64
.x30
= VG_(threads
)[tid
].arch
.vex
.guest_X30
;
112 # elif defined(VGA_s390x)
113 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_IA
;
114 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_SP
;
115 regs
->misc
.S390X
.r_fp
116 = VG_(threads
)[tid
].arch
.vex
.guest_FP
;
117 regs
->misc
.S390X
.r_lr
118 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs
->misc
.S390X
.r_f0
121 = VG_(threads
)[tid
].arch
.vex
.guest_v0
.w64
[0];
122 regs
->misc
.S390X
.r_f1
123 = VG_(threads
)[tid
].arch
.vex
.guest_v1
.w64
[0];
124 regs
->misc
.S390X
.r_f2
125 = VG_(threads
)[tid
].arch
.vex
.guest_v2
.w64
[0];
126 regs
->misc
.S390X
.r_f3
127 = VG_(threads
)[tid
].arch
.vex
.guest_v3
.w64
[0];
128 regs
->misc
.S390X
.r_f4
129 = VG_(threads
)[tid
].arch
.vex
.guest_v4
.w64
[0];
130 regs
->misc
.S390X
.r_f5
131 = VG_(threads
)[tid
].arch
.vex
.guest_v5
.w64
[0];
132 regs
->misc
.S390X
.r_f6
133 = VG_(threads
)[tid
].arch
.vex
.guest_v6
.w64
[0];
134 regs
->misc
.S390X
.r_f7
135 = VG_(threads
)[tid
].arch
.vex
.guest_v7
.w64
[0];
136 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
137 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
138 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
139 regs
->misc
.MIPS32
.r30
140 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
141 regs
->misc
.MIPS32
.r31
142 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
143 regs
->misc
.MIPS32
.r28
144 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
145 # elif defined(VGA_mips64)
146 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
147 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
148 regs
->misc
.MIPS64
.r30
149 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
150 regs
->misc
.MIPS64
.r31
151 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
152 regs
->misc
.MIPS64
.r28
153 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
155 # error "Unknown arch"
160 VG_(get_shadow_regs_area
) ( ThreadId tid
,
162 /*SRC*/Int shadowNo
, PtrdiffT offset
, SizeT size
)
166 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
167 vg_assert(VG_(is_valid_tid
)(tid
));
169 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
170 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
172 tst
= & VG_(threads
)[tid
];
175 case 0: src
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
176 case 1: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
177 case 2: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
179 vg_assert(src
!= NULL
);
180 VG_(memcpy
)( dst
, src
, size
);
184 VG_(set_shadow_regs_area
) ( ThreadId tid
,
185 /*DST*/Int shadowNo
, PtrdiffT offset
, SizeT size
,
186 /*SRC*/const UChar
* src
)
190 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
191 vg_assert(VG_(is_valid_tid
)(tid
));
193 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
194 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
196 tst
= & VG_(threads
)[tid
];
199 case 0: dst
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
200 case 1: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
201 case 2: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
203 vg_assert(dst
!= NULL
);
204 VG_(memcpy
)( dst
, src
, size
);
208 static void apply_to_GPs_of_tid(ThreadId tid
, void (*f
)(ThreadId
,
211 VexGuestArchState
* vex
= &(VG_(get_ThreadState
)(tid
)->arch
.vex
);
212 VG_(debugLog
)(2, "machine", "apply_to_GPs_of_tid %u\n", tid
);
214 (*f
)(tid
, "EAX", vex
->guest_EAX
);
215 (*f
)(tid
, "ECX", vex
->guest_ECX
);
216 (*f
)(tid
, "EDX", vex
->guest_EDX
);
217 (*f
)(tid
, "EBX", vex
->guest_EBX
);
218 (*f
)(tid
, "ESI", vex
->guest_ESI
);
219 (*f
)(tid
, "EDI", vex
->guest_EDI
);
220 (*f
)(tid
, "ESP", vex
->guest_ESP
);
221 (*f
)(tid
, "EBP", vex
->guest_EBP
);
222 #elif defined(VGA_amd64)
223 (*f
)(tid
, "RAX", vex
->guest_RAX
);
224 (*f
)(tid
, "RCX", vex
->guest_RCX
);
225 (*f
)(tid
, "RDX", vex
->guest_RDX
);
226 (*f
)(tid
, "RBX", vex
->guest_RBX
);
227 (*f
)(tid
, "RSI", vex
->guest_RSI
);
228 (*f
)(tid
, "RDI", vex
->guest_RDI
);
229 (*f
)(tid
, "RSP", vex
->guest_RSP
);
230 (*f
)(tid
, "RBP", vex
->guest_RBP
);
231 (*f
)(tid
, "R8" , vex
->guest_R8
);
232 (*f
)(tid
, "R9" , vex
->guest_R9
);
233 (*f
)(tid
, "R10", vex
->guest_R10
);
234 (*f
)(tid
, "R11", vex
->guest_R11
);
235 (*f
)(tid
, "R12", vex
->guest_R12
);
236 (*f
)(tid
, "R13", vex
->guest_R13
);
237 (*f
)(tid
, "R14", vex
->guest_R14
);
238 (*f
)(tid
, "R15", vex
->guest_R15
);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f
)(tid
, "GPR0" , vex
->guest_GPR0
);
241 (*f
)(tid
, "GPR1" , vex
->guest_GPR1
);
242 (*f
)(tid
, "GPR2" , vex
->guest_GPR2
);
243 (*f
)(tid
, "GPR3" , vex
->guest_GPR3
);
244 (*f
)(tid
, "GPR4" , vex
->guest_GPR4
);
245 (*f
)(tid
, "GPR5" , vex
->guest_GPR5
);
246 (*f
)(tid
, "GPR6" , vex
->guest_GPR6
);
247 (*f
)(tid
, "GPR7" , vex
->guest_GPR7
);
248 (*f
)(tid
, "GPR8" , vex
->guest_GPR8
);
249 (*f
)(tid
, "GPR9" , vex
->guest_GPR9
);
250 (*f
)(tid
, "GPR10", vex
->guest_GPR10
);
251 (*f
)(tid
, "GPR11", vex
->guest_GPR11
);
252 (*f
)(tid
, "GPR12", vex
->guest_GPR12
);
253 (*f
)(tid
, "GPR13", vex
->guest_GPR13
);
254 (*f
)(tid
, "GPR14", vex
->guest_GPR14
);
255 (*f
)(tid
, "GPR15", vex
->guest_GPR15
);
256 (*f
)(tid
, "GPR16", vex
->guest_GPR16
);
257 (*f
)(tid
, "GPR17", vex
->guest_GPR17
);
258 (*f
)(tid
, "GPR18", vex
->guest_GPR18
);
259 (*f
)(tid
, "GPR19", vex
->guest_GPR19
);
260 (*f
)(tid
, "GPR20", vex
->guest_GPR20
);
261 (*f
)(tid
, "GPR21", vex
->guest_GPR21
);
262 (*f
)(tid
, "GPR22", vex
->guest_GPR22
);
263 (*f
)(tid
, "GPR23", vex
->guest_GPR23
);
264 (*f
)(tid
, "GPR24", vex
->guest_GPR24
);
265 (*f
)(tid
, "GPR25", vex
->guest_GPR25
);
266 (*f
)(tid
, "GPR26", vex
->guest_GPR26
);
267 (*f
)(tid
, "GPR27", vex
->guest_GPR27
);
268 (*f
)(tid
, "GPR28", vex
->guest_GPR28
);
269 (*f
)(tid
, "GPR29", vex
->guest_GPR29
);
270 (*f
)(tid
, "GPR30", vex
->guest_GPR30
);
271 (*f
)(tid
, "GPR31", vex
->guest_GPR31
);
272 (*f
)(tid
, "CTR" , vex
->guest_CTR
);
273 (*f
)(tid
, "LR" , vex
->guest_LR
);
274 #elif defined(VGA_arm)
275 (*f
)(tid
, "R0" , vex
->guest_R0
);
276 (*f
)(tid
, "R1" , vex
->guest_R1
);
277 (*f
)(tid
, "R2" , vex
->guest_R2
);
278 (*f
)(tid
, "R3" , vex
->guest_R3
);
279 (*f
)(tid
, "R4" , vex
->guest_R4
);
280 (*f
)(tid
, "R5" , vex
->guest_R5
);
281 (*f
)(tid
, "R6" , vex
->guest_R6
);
282 (*f
)(tid
, "R8" , vex
->guest_R8
);
283 (*f
)(tid
, "R9" , vex
->guest_R9
);
284 (*f
)(tid
, "R10", vex
->guest_R10
);
285 (*f
)(tid
, "R11", vex
->guest_R11
);
286 (*f
)(tid
, "R12", vex
->guest_R12
);
287 (*f
)(tid
, "R13", vex
->guest_R13
);
288 (*f
)(tid
, "R14", vex
->guest_R14
);
289 #elif defined(VGA_s390x)
290 (*f
)(tid
, "r0" , vex
->guest_r0
);
291 (*f
)(tid
, "r1" , vex
->guest_r1
);
292 (*f
)(tid
, "r2" , vex
->guest_r2
);
293 (*f
)(tid
, "r3" , vex
->guest_r3
);
294 (*f
)(tid
, "r4" , vex
->guest_r4
);
295 (*f
)(tid
, "r5" , vex
->guest_r5
);
296 (*f
)(tid
, "r6" , vex
->guest_r6
);
297 (*f
)(tid
, "r7" , vex
->guest_r7
);
298 (*f
)(tid
, "r8" , vex
->guest_r8
);
299 (*f
)(tid
, "r9" , vex
->guest_r9
);
300 (*f
)(tid
, "r10", vex
->guest_r10
);
301 (*f
)(tid
, "r11", vex
->guest_r11
);
302 (*f
)(tid
, "r12", vex
->guest_r12
);
303 (*f
)(tid
, "r13", vex
->guest_r13
);
304 (*f
)(tid
, "r14", vex
->guest_r14
);
305 (*f
)(tid
, "r15", vex
->guest_r15
);
306 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGP_nanomips_linux)
307 (*f
)(tid
, "r0" , vex
->guest_r0
);
308 (*f
)(tid
, "r1" , vex
->guest_r1
);
309 (*f
)(tid
, "r2" , vex
->guest_r2
);
310 (*f
)(tid
, "r3" , vex
->guest_r3
);
311 (*f
)(tid
, "r4" , vex
->guest_r4
);
312 (*f
)(tid
, "r5" , vex
->guest_r5
);
313 (*f
)(tid
, "r6" , vex
->guest_r6
);
314 (*f
)(tid
, "r7" , vex
->guest_r7
);
315 (*f
)(tid
, "r8" , vex
->guest_r8
);
316 (*f
)(tid
, "r9" , vex
->guest_r9
);
317 (*f
)(tid
, "r10", vex
->guest_r10
);
318 (*f
)(tid
, "r11", vex
->guest_r11
);
319 (*f
)(tid
, "r12", vex
->guest_r12
);
320 (*f
)(tid
, "r13", vex
->guest_r13
);
321 (*f
)(tid
, "r14", vex
->guest_r14
);
322 (*f
)(tid
, "r15", vex
->guest_r15
);
323 (*f
)(tid
, "r16", vex
->guest_r16
);
324 (*f
)(tid
, "r17", vex
->guest_r17
);
325 (*f
)(tid
, "r18", vex
->guest_r18
);
326 (*f
)(tid
, "r19", vex
->guest_r19
);
327 (*f
)(tid
, "r20", vex
->guest_r20
);
328 (*f
)(tid
, "r21", vex
->guest_r21
);
329 (*f
)(tid
, "r22", vex
->guest_r22
);
330 (*f
)(tid
, "r23", vex
->guest_r23
);
331 (*f
)(tid
, "r24", vex
->guest_r24
);
332 (*f
)(tid
, "r25", vex
->guest_r25
);
333 (*f
)(tid
, "r26", vex
->guest_r26
);
334 (*f
)(tid
, "r27", vex
->guest_r27
);
335 (*f
)(tid
, "r28", vex
->guest_r28
);
336 (*f
)(tid
, "r29", vex
->guest_r29
);
337 (*f
)(tid
, "r30", vex
->guest_r30
);
338 (*f
)(tid
, "r31", vex
->guest_r31
);
339 #elif defined(VGA_arm64)
340 (*f
)(tid
, "x0" , vex
->guest_X0
);
341 (*f
)(tid
, "x1" , vex
->guest_X1
);
342 (*f
)(tid
, "x2" , vex
->guest_X2
);
343 (*f
)(tid
, "x3" , vex
->guest_X3
);
344 (*f
)(tid
, "x4" , vex
->guest_X4
);
345 (*f
)(tid
, "x5" , vex
->guest_X5
);
346 (*f
)(tid
, "x6" , vex
->guest_X6
);
347 (*f
)(tid
, "x7" , vex
->guest_X7
);
348 (*f
)(tid
, "x8" , vex
->guest_X8
);
349 (*f
)(tid
, "x9" , vex
->guest_X9
);
350 (*f
)(tid
, "x10", vex
->guest_X10
);
351 (*f
)(tid
, "x11", vex
->guest_X11
);
352 (*f
)(tid
, "x12", vex
->guest_X12
);
353 (*f
)(tid
, "x13", vex
->guest_X13
);
354 (*f
)(tid
, "x14", vex
->guest_X14
);
355 (*f
)(tid
, "x15", vex
->guest_X15
);
356 (*f
)(tid
, "x16", vex
->guest_X16
);
357 (*f
)(tid
, "x17", vex
->guest_X17
);
358 (*f
)(tid
, "x18", vex
->guest_X18
);
359 (*f
)(tid
, "x19", vex
->guest_X19
);
360 (*f
)(tid
, "x20", vex
->guest_X20
);
361 (*f
)(tid
, "x21", vex
->guest_X21
);
362 (*f
)(tid
, "x22", vex
->guest_X22
);
363 (*f
)(tid
, "x23", vex
->guest_X23
);
364 (*f
)(tid
, "x24", vex
->guest_X24
);
365 (*f
)(tid
, "x25", vex
->guest_X25
);
366 (*f
)(tid
, "x26", vex
->guest_X26
);
367 (*f
)(tid
, "x27", vex
->guest_X27
);
368 (*f
)(tid
, "x28", vex
->guest_X28
);
369 (*f
)(tid
, "x29", vex
->guest_X29
);
370 (*f
)(tid
, "x30", vex
->guest_X30
);
377 void VG_(apply_to_GP_regs
)(void (*f
)(ThreadId
, const HChar
*, UWord
))
381 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
382 if (VG_(is_valid_tid
)(tid
)
383 || VG_(threads
)[tid
].exitreason
== VgSrc_ExitProcess
) {
384 // live thread or thread instructed to die by another thread that
386 apply_to_GPs_of_tid(tid
, f
);
391 void VG_(thread_stack_reset_iter
)(/*OUT*/ThreadId
* tid
)
393 *tid
= (ThreadId
)(-1);
396 Bool
VG_(thread_stack_next
)(/*MOD*/ThreadId
* tid
,
397 /*OUT*/Addr
* stack_min
,
398 /*OUT*/Addr
* stack_max
)
401 for (i
= (*tid
)+1; i
< VG_N_THREADS
; i
++) {
402 if (i
== VG_INVALID_THREADID
)
404 if (VG_(threads
)[i
].status
!= VgTs_Empty
) {
406 *stack_min
= VG_(get_SP
)(i
);
407 *stack_max
= VG_(threads
)[i
].client_stack_highest_byte
;
414 Addr
VG_(thread_get_stack_max
)(ThreadId tid
)
416 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
417 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
418 return VG_(threads
)[tid
].client_stack_highest_byte
;
421 SizeT
VG_(thread_get_stack_size
)(ThreadId tid
)
423 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
424 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
425 return VG_(threads
)[tid
].client_stack_szB
;
428 Addr
VG_(thread_get_altstack_min
)(ThreadId tid
)
430 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
431 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
432 return (Addr
)VG_(threads
)[tid
].altstack
.ss_sp
;
435 SizeT
VG_(thread_get_altstack_size
)(ThreadId tid
)
437 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
438 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
439 return VG_(threads
)[tid
].altstack
.ss_size
;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done
= False
;
458 /* --- all archs --- */
459 static VexArch va
= VexArch_INVALID
;
460 static VexArchInfo vai
;
463 UInt
VG_(machine_x86_have_mxcsr
) = 0;
465 #if defined(VGA_ppc32)
466 UInt
VG_(machine_ppc32_has_FP
) = 0;
467 UInt
VG_(machine_ppc32_has_VMX
) = 0;
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong
VG_(machine_ppc64_has_VMX
) = 0;
473 Int
VG_(machine_arm_archlevel
) = 4;
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
481 || defined(VGA_mips64) || defined(VGA_arm64)
482 #include "pub_core_libcsetjmp.h"
483 static VG_MINIMAL_JMP_BUF(env_unsup_insn
);
484 static void handler_unsup_insn ( Int x
) {
485 VG_MINIMAL_LONGJMP(env_unsup_insn
);
490 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
491 * handlers are installed. Determines the sizes affected by dcbz
492 * and dcbzl instructions and updates the given VexArchInfo structure
495 * Not very defensive: assumes that as long as the dcbz/dcbzl
496 * instructions don't raise a SIGILL, that they will zero an aligned,
497 * contiguous block of memory of a sensible size. */
498 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
499 static void find_ppc_dcbz_sz(VexArchInfo
*arch_info
)
503 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
504 char test_block
[4*MAX_DCBZL_SZB
];
505 char *aligned
= test_block
;
508 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
509 aligned
= (char *)(((HWord
)aligned
+ MAX_DCBZL_SZB
) & ~(MAX_DCBZL_SZB
- 1));
510 vg_assert((aligned
+ MAX_DCBZL_SZB
) <= &test_block
[sizeof(test_block
)]);
512 /* dcbz often clears 32B, although sometimes whatever the native cache
514 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
515 __asm__
__volatile__("dcbz 0,%0"
517 : "r" (aligned
) /*in*/
518 : "memory" /*clobber*/);
519 for (dcbz_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
523 vg_assert(dcbz_szB
== 16 || dcbz_szB
== 32 || dcbz_szB
== 64 || dcbz_szB
== 128);
525 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
526 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
527 dcbzl_szB
= 0; /* indicates unsupported */
530 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
531 /* some older assemblers won't understand the dcbzl instruction
532 * variant, so we directly emit the instruction ourselves */
533 __asm__
__volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
535 : "r" (aligned
) /*in*/
536 : "memory", "r9" /*clobber*/);
537 for (dcbzl_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
541 vg_assert(dcbzl_szB
== 16 || dcbzl_szB
== 32 || dcbzl_szB
== 64 || dcbzl_szB
== 128);
544 arch_info
->ppc_dcbz_szB
= dcbz_szB
;
545 arch_info
->ppc_dcbzl_szB
= dcbzl_szB
;
547 VG_(debugLog
)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
548 dcbz_szB
, dcbzl_szB
);
549 # undef MAX_DCBZL_SZB
551 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
555 /* Read /proc/cpuinfo. Look for lines like these
557 processor 0: version = FF, identification = 0117C9, machine = 2064
559 and return the machine model. If the machine model could not be determined
560 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
562 static UInt
VG_(get_machine_model
)(void)
564 static struct model_map
{
568 { "2064", VEX_S390X_MODEL_Z900
},
569 { "2066", VEX_S390X_MODEL_Z800
},
570 { "2084", VEX_S390X_MODEL_Z990
},
571 { "2086", VEX_S390X_MODEL_Z890
},
572 { "2094", VEX_S390X_MODEL_Z9_EC
},
573 { "2096", VEX_S390X_MODEL_Z9_BC
},
574 { "2097", VEX_S390X_MODEL_Z10_EC
},
575 { "2098", VEX_S390X_MODEL_Z10_BC
},
576 { "2817", VEX_S390X_MODEL_Z196
},
577 { "2818", VEX_S390X_MODEL_Z114
},
578 { "2827", VEX_S390X_MODEL_ZEC12
},
579 { "2828", VEX_S390X_MODEL_ZBC12
},
580 { "2964", VEX_S390X_MODEL_Z13
},
581 { "2965", VEX_S390X_MODEL_Z13S
},
582 { "3906", VEX_S390X_MODEL_Z14
},
583 { "3907", VEX_S390X_MODEL_Z14_ZR1
},
584 { "8561", VEX_S390X_MODEL_Z15
},
585 { "8562", VEX_S390X_MODEL_Z15
},
590 SizeT num_bytes
, file_buf_size
;
591 HChar
*p
, *m
, *model_name
, *file_buf
;
593 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
594 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
595 if ( sr_isError(fd
) ) return VEX_S390X_MODEL_UNKNOWN
;
599 /* Determine the size of /proc/cpuinfo.
600 Work around broken-ness in /proc file system implementation.
601 fstat returns a zero size for /proc/cpuinfo although it is
602 claimed to be a regular file. */
604 file_buf_size
= 1000;
605 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
607 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
611 if (n
< file_buf_size
) break; /* reached EOF */
614 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
616 if (num_bytes
> file_buf_size
) {
617 VG_(free
)( file_buf
);
618 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
619 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
620 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
621 if (n
< 0) num_bytes
= 0;
624 file_buf
[num_bytes
] = '\0';
628 model
= VEX_S390X_MODEL_UNKNOWN
;
629 for (p
= file_buf
; *p
; ++p
) {
630 /* Beginning of line */
631 if (VG_(strncmp
)( p
, "processor", sizeof "processor" - 1 ) != 0) continue;
633 m
= VG_(strstr
)( p
, "machine" );
634 if (m
== NULL
) continue;
636 p
= m
+ sizeof "machine" - 1;
637 while ( VG_(isspace
)( *p
) || *p
== '=') {
638 if (*p
== '\n') goto next_line
;
643 for (n
= 0; n
< sizeof model_map
/ sizeof model_map
[0]; ++n
) {
644 struct model_map
*mm
= model_map
+ n
;
645 SizeT len
= VG_(strlen
)( mm
->name
);
646 if ( VG_(strncmp
)( mm
->name
, model_name
, len
) == 0 &&
647 VG_(isspace
)( model_name
[len
] )) {
648 if (mm
->id
< model
) model
= mm
->id
;
649 p
= model_name
+ len
;
653 /* Skip until end-of-line */
659 VG_(free
)( file_buf
);
660 VG_(debugLog
)(1, "machine", "model = %s\n",
661 model
== VEX_S390X_MODEL_UNKNOWN
? "UNKNOWN"
662 : model_map
[model
].name
);
666 #endif /* defined(VGA_s390x) */
668 #if defined(VGA_mips32) || defined(VGA_mips64)
671 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
672 * determine what CPU it is (it searches only for the models that are or may be
673 * supported by Valgrind).
675 static Bool
VG_(parse_cpuinfo
)(void)
677 const char *search_Broadcom_str
= "cpu model\t\t: Broadcom";
678 const char *search_Cavium_str
= "cpu model\t\t: Cavium";
679 const char *search_Ingenic_str
= "cpu model\t\t: Ingenic";
680 const char *search_Loongson_str
= "cpu model\t\t: ICT Loongson";
681 const char *search_MIPS_str
= "cpu model\t\t: MIPS";
682 const char *search_Netlogic_str
= "cpu model\t\t: Netlogic";
686 SizeT num_bytes
, file_buf_size
;
687 HChar
*file_buf
, *isa
;
689 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
690 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
691 if ( sr_isError(fd
) ) return False
;
695 /* Determine the size of /proc/cpuinfo.
696 Work around broken-ness in /proc file system implementation.
697 fstat returns a zero size for /proc/cpuinfo although it is
698 claimed to be a regular file. */
700 file_buf_size
= 1000;
701 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
703 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
707 if (n
< file_buf_size
) break; /* reached EOF */
710 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
712 if (num_bytes
> file_buf_size
) {
713 VG_(free
)( file_buf
);
714 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
715 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
716 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
717 if (n
< 0) num_bytes
= 0;
720 file_buf
[num_bytes
] = '\0';
724 if (VG_(strstr
)(file_buf
, search_Broadcom_str
) != NULL
)
725 vai
.hwcaps
= VEX_PRID_COMP_BROADCOM
;
726 else if (VG_(strstr
)(file_buf
, search_Netlogic_str
) != NULL
)
727 vai
.hwcaps
= VEX_PRID_COMP_NETLOGIC
;
728 else if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
729 vai
.hwcaps
= VEX_PRID_COMP_CAVIUM
;
730 else if (VG_(strstr
)(file_buf
, search_MIPS_str
) != NULL
)
731 vai
.hwcaps
= VEX_PRID_COMP_MIPS
;
732 else if (VG_(strstr
)(file_buf
, search_Ingenic_str
) != NULL
)
733 vai
.hwcaps
= VEX_PRID_COMP_INGENIC_E1
;
734 else if (VG_(strstr
)(file_buf
, search_Loongson_str
) != NULL
)
735 vai
.hwcaps
= (VEX_PRID_COMP_LEGACY
| VEX_PRID_IMP_LOONGSON_64
);
737 /* Did not find string in the proc file. */
743 isa
= VG_(strstr
)(file_buf
, "isa\t\t\t: ");
746 if (VG_(strstr
) (isa
, "mips32r1") != NULL
)
747 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
748 if (VG_(strstr
) (isa
, "mips32r2") != NULL
)
749 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
750 if (VG_(strstr
) (isa
, "mips32r6") != NULL
)
751 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R6
;
752 if (VG_(strstr
) (isa
, "mips64r1") != NULL
)
753 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R1
;
754 if (VG_(strstr
) (isa
, "mips64r2") != NULL
)
755 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
;
756 if (VG_(strstr
) (isa
, "mips64r6") != NULL
)
757 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R6
;
760 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
761 * decide to change incorrect settings in
762 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
763 * The current settings show mips32r1, mips32r2 and mips64r1 as
764 * unsupported ISAs by Cavium MIPS CPUs.
766 if (VEX_MIPS_COMP_ID(vai
.hwcaps
) == VEX_PRID_COMP_CAVIUM
) {
767 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
| VEX_MIPS_CPU_ISA_M32R2
|
768 VEX_MIPS_CPU_ISA_M64R1
;
772 * Kernel does not provide information about supported ISAs.
773 * Populate the isa level flags based on the CPU model. That is our
776 switch VEX_MIPS_COMP_ID(vai
.hwcaps
) {
777 case VEX_PRID_COMP_CAVIUM
:
778 case VEX_PRID_COMP_NETLOGIC
:
779 vai
.hwcaps
|= (VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
);
781 case VEX_PRID_COMP_INGENIC_E1
:
782 case VEX_PRID_COMP_MIPS
:
783 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
785 case VEX_PRID_COMP_BROADCOM
:
786 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
788 case VEX_PRID_COMP_LEGACY
:
789 if ((VEX_MIPS_PROC_ID(vai
.hwcaps
) == VEX_PRID_IMP_LOONGSON_64
))
790 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
|
791 VEX_MIPS_CPU_ISA_M32R2
| VEX_MIPS_CPU_ISA_M32R1
;
801 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
803 #if defined(VGP_arm64_linux)
805 /* Check to see whether we are running on a Cavium core, and if so auto-enable
806 the fallback LLSC implementation. See #369459. */
808 static Bool
VG_(parse_cpuinfo
)(void)
810 const char *search_Cavium_str
= "CPU implementer\t: 0x43";
814 SizeT num_bytes
, file_buf_size
;
817 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
818 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
819 if ( sr_isError(fd
) ) return False
;
823 /* Determine the size of /proc/cpuinfo.
824 Work around broken-ness in /proc file system implementation.
825 fstat returns a zero size for /proc/cpuinfo although it is
826 claimed to be a regular file. */
828 file_buf_size
= 1000;
829 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
831 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
835 if (n
< file_buf_size
) break; /* reached EOF */
838 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
840 if (num_bytes
> file_buf_size
) {
841 VG_(free
)( file_buf
);
842 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
843 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
844 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
845 if (n
< 0) num_bytes
= 0;
848 file_buf
[num_bytes
] = '\0';
852 if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
853 vai
.arm64_requires_fallback_LLSC
= True
;
859 #endif /* defined(VGP_arm64_linux) */
861 Bool
VG_(machine_get_hwcaps
)( void )
863 vg_assert(hwcaps_done
== False
);
866 // Whack default settings into vai, so that we only need to fill in
867 // any interesting bits.
868 LibVEX_default_VexArchInfo(&vai
);
871 { Bool have_sse1
, have_sse2
, have_sse3
, have_cx8
, have_lzcnt
, have_mmxext
;
872 UInt eax
, ebx
, ecx
, edx
, max_extended
;
876 if (!VG_(has_cpuid
)())
877 /* we can't do cpuid at all. Give up. */
880 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
882 /* we can't ask for cpuid(x) for x > 0. Give up. */
885 /* Get processor ID string, and max basic/extended index
887 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
888 VG_(memcpy
)(&vstr
[4], &edx
, 4);
889 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
892 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
895 /* get capabilities bits into edx */
896 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
898 have_sse1
= (edx
& (1<<25)) != 0; /* True => have sse insns */
899 have_sse2
= (edx
& (1<<26)) != 0; /* True => have sse2 insns */
900 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
902 /* cmpxchg8b is a minimum requirement now; if we don't have it we
903 must simply give up. But all CPUs since Pentium-I have it, so
904 that doesn't seem like much of a restriction. */
905 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
909 /* Figure out if this is an AMD that can do MMXEXT. */
911 if (0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
912 && max_extended
>= 0x80000001) {
913 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
914 /* Some older AMD processors support a sse1 subset (Integer SSE). */
915 have_mmxext
= !have_sse1
&& ((edx
& (1<<22)) != 0);
918 /* Figure out if this is an AMD or Intel that can do LZCNT. */
920 if ((0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
921 || 0 == VG_(strcmp
)(vstr
, "GenuineIntel"))
922 && max_extended
>= 0x80000001) {
923 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
924 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
927 /* Intel processors don't define the mmxext extension, but since it
928 is just a sse1 subset always define it when we have sse1. */
933 vai
.endness
= VexEndnessLE
;
935 if (have_sse3
&& have_sse2
&& have_sse1
&& have_mmxext
) {
936 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
937 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
938 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
939 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE3
;
941 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
942 VG_(machine_x86_have_mxcsr
) = 1;
943 } else if (have_sse2
&& have_sse1
&& have_mmxext
) {
944 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
945 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
946 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
948 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
949 VG_(machine_x86_have_mxcsr
) = 1;
950 } else if (have_sse1
&& have_mmxext
) {
951 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
952 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
953 VG_(machine_x86_have_mxcsr
) = 1;
954 } else if (have_mmxext
) {
955 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
; /*integer only sse1 subset*/
956 VG_(machine_x86_have_mxcsr
) = 0;
958 vai
.hwcaps
= 0; /*baseline - no sse at all*/
959 VG_(machine_x86_have_mxcsr
) = 0;
962 VG_(machine_get_cache_info
)(&vai
);
967 #elif defined(VGA_amd64)
968 { Bool have_sse3
, have_ssse3
, have_cx8
, have_cx16
;
969 Bool have_lzcnt
, have_avx
, have_bmi
, have_avx2
;
970 Bool have_rdtscp
, have_rdrand
, have_f16c
, have_rdseed
;
971 UInt eax
, ebx
, ecx
, edx
, max_basic
, max_extended
;
976 have_sse3
= have_ssse3
= have_cx8
= have_cx16
977 = have_lzcnt
= have_avx
= have_bmi
= have_avx2
978 = have_rdtscp
= have_rdrand
= have_f16c
= have_rdseed
= False
;
980 eax
= ebx
= ecx
= edx
= max_basic
= max_extended
= 0;
982 if (!VG_(has_cpuid
)())
983 /* we can't do cpuid at all. Give up. */
986 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
989 /* we can't ask for cpuid(x) for x > 0. Give up. */
992 /* Get processor ID string, and max basic/extended index
994 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
995 VG_(memcpy
)(&vstr
[4], &edx
, 4);
996 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
999 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
1002 /* get capabilities bits into edx */
1003 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
1005 // we assume that SSE1 and SSE2 are available by default
1006 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
1007 have_ssse3
= (ecx
& (1<<9)) != 0; /* True => have Sup SSE3 insns */
1012 // osxsave is ecx:27
1014 have_f16c
= (ecx
& (1<<29)) != 0; /* True => have F16C insns */
1015 have_rdrand
= (ecx
& (1<<30)) != 0; /* True => have RDRAND insns */
1018 /* have_fma = False; */
1019 if ( (ecx
& ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1020 /* Processor supports AVX instructions and XGETBV is enabled
1021 by OS and AVX instructions are enabled by the OS. */
1023 __asm__
__volatile__("movq $0,%%rcx ; "
1024 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1026 :/*OUT*/"=r"(w
) :/*IN*/
1027 :/*TRASH*/"rdx","rcx","rax");
1029 if ((xgetbv_0
& 7) == 7) {
1030 /* Only say we have AVX if the XSAVE-allowable
1031 bitfield-mask allows x87, SSE and AVX state. We could
1032 actually run with a more restrictive XGETBV(0) value,
1033 but VEX's implementation of XSAVE and XRSTOR assumes
1034 that all 3 bits are enabled.
1036 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1037 state component [2] (the YMM high halves) are located in
1038 the XSAVE image at offsets 576 .. 831. So we have to
1039 check that here before declaring AVX to be supported. */
1040 UInt eax2
, ebx2
, ecx2
, edx2
;
1041 VG_(cpuid
)(0xD, 2, &eax2
, &ebx2
, &ecx2
, &edx2
);
1042 if (ebx2
== 576 && eax2
== 256) {
1045 /* have_fma = (ecx & (1<<12)) != 0; */
1046 /* have_fma: Probably correct, but gcc complains due to
1051 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1052 must simply give up. But all CPUs since Pentium-I have it, so
1053 that doesn't seem like much of a restriction. */
1054 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
1058 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1059 have_cx16
= (ecx
& (1<<13)) != 0; /* True => have cmpxchg16b */
1061 /* Figure out if this CPU can do LZCNT. */
1063 if (max_extended
>= 0x80000001) {
1064 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1065 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
1068 /* Can we do RDTSCP? */
1069 have_rdtscp
= False
;
1070 if (max_extended
>= 0x80000001) {
1071 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1072 have_rdtscp
= (edx
& (1<<27)) != 0; /* True => have RDTSVCP */
1075 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1078 if (have_avx
&& max_basic
>= 7) {
1079 VG_(cpuid
)(7, 0, &eax
, &ebx
, &ecx
, &edx
);
1080 have_bmi
= (ebx
& (1<<3)) != 0; /* True => have BMI1 */
1081 have_avx2
= (ebx
& (1<<5)) != 0; /* True => have AVX2 */
1082 have_rdseed
= (ebx
& (1<<18)) != 0; /* True => have RDSEED insns */
1085 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1086 it's convenient to restrict them to the AVX case since the simulated
1087 CPUID we'll offer them on has AVX as a base. */
1090 have_rdrand
= False
;
1091 have_rdseed
= False
;
1095 vai
.endness
= VexEndnessLE
;
1096 vai
.hwcaps
= (have_sse3
? VEX_HWCAPS_AMD64_SSE3
: 0)
1097 | (have_ssse3
? VEX_HWCAPS_AMD64_SSSE3
: 0)
1098 | (have_cx16
? VEX_HWCAPS_AMD64_CX16
: 0)
1099 | (have_lzcnt
? VEX_HWCAPS_AMD64_LZCNT
: 0)
1100 | (have_avx
? VEX_HWCAPS_AMD64_AVX
: 0)
1101 | (have_bmi
? VEX_HWCAPS_AMD64_BMI
: 0)
1102 | (have_avx2
? VEX_HWCAPS_AMD64_AVX2
: 0)
1103 | (have_rdtscp
? VEX_HWCAPS_AMD64_RDTSCP
: 0)
1104 | (have_f16c
? VEX_HWCAPS_AMD64_F16C
: 0)
1105 | (have_rdrand
? VEX_HWCAPS_AMD64_RDRAND
: 0)
1106 | (have_rdseed
? VEX_HWCAPS_AMD64_RDSEED
: 0);
1108 VG_(machine_get_cache_info
)(&vai
);
1113 #elif defined(VGA_ppc32)
1115 /* Find out which subset of the ppc32 instruction set is supported by
1116 verifying whether various ppc32 instructions generate a SIGILL
1117 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1118 AT_PLATFORM entries in the ELF auxiliary table -- see also
1119 the_iifii.client_auxv in m_main.c.
1121 vki_sigset_t saved_set
, tmp_set
;
1122 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1123 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1125 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1126 volatile Bool have_isa_2_07
, have_isa_3_0
;
1129 /* This is a kludge. Really we ought to back-convert saved_act
1130 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1131 since that's a no-op on all ppc32 platforms so far supported,
1132 it's not worth the typing effort. At least include most basic
1134 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1136 VG_(sigemptyset
)(&tmp_set
);
1137 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1138 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1140 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1143 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1145 tmp_sigill_act
= saved_sigill_act
;
1147 r
= VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1149 tmp_sigfpe_act
= saved_sigfpe_act
;
1151 /* NODEFER: signal handler does not return (from the kernel's point of
1152 view), hence if it is to successfully catch a signal more than once,
1153 we need the NODEFER flag. */
1154 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1155 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1156 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1157 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1158 r
= VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1161 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1162 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1163 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1164 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1165 r
= VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1168 /* standard FP insns */
1170 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1173 __asm__
__volatile__(".long 0xFC000090"); /*fmr 0,0 */
1178 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1181 /* Unfortunately some older assemblers don't speak Altivec (or
1182 choose not to), so to be safe we directly emit the 32-bit
1183 word corresponding to "vor 0,0,0". This fixes a build
1184 problem that happens on Debian 3.1 (ppc32), and probably
1185 various other places. */
1186 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1189 /* General-Purpose optional (fsqrt, fsqrts) */
1191 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1194 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1197 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1199 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1202 __asm__
__volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1205 /* VSX support implies Power ISA 2.06 */
1207 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1210 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1213 /* Check for Decimal Floating Point (DFP) support. */
1215 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1218 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1221 /* Check for ISA 2.07 support. */
1222 have_isa_2_07
= True
;
1223 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1224 have_isa_2_07
= False
;
1226 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1229 /* Check for ISA 3.0 support. */
1230 have_isa_3_0
= True
;
1231 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1232 have_isa_3_0
= False
;
1234 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1237 // ISA 3.1 not supported on 32-bit systems
1239 /* determine dcbz/dcbzl sizes while we still have the signal
1240 * handlers registered */
1241 find_ppc_dcbz_sz(&vai
);
1243 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1245 r
= VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1247 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1249 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1250 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1251 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1252 (Int
)have_isa_2_07
, (Int
)have_isa_3_0
);
1253 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1254 if (have_V
&& !have_F
)
1256 if (have_FX
&& !have_F
)
1258 if (have_GX
&& !have_F
)
1261 VG_(machine_ppc32_has_FP
) = have_F
? 1 : 0;
1262 VG_(machine_ppc32_has_VMX
) = have_V
? 1 : 0;
1265 vai
.endness
= VexEndnessBE
;
1268 if (have_F
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_F
;
1269 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_V
;
1270 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_FX
;
1271 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_GX
;
1272 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_VX
;
1273 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_DFP
;
1274 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA2_07
;
1275 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA3_0
;
1276 /* ISA 3.1 not supported on 32-bit systems. */
1278 VG_(machine_get_cache_info
)(&vai
);
1280 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1281 called before we're ready to go. */
1285 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1287 /* Same instruction set detection algorithm as for ppc32. */
1288 vki_sigset_t saved_set
, tmp_set
;
1289 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1290 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1292 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1293 volatile Bool have_isa_2_07
, have_isa_3_0
, have_isa_3_1
;
1296 /* This is a kludge. Really we ought to back-convert saved_act
1297 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1298 since that's a no-op on all ppc64 platforms so far supported,
1299 it's not worth the typing effort. At least include most basic
1301 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1303 VG_(sigemptyset
)(&tmp_set
);
1304 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1305 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1307 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1310 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1312 tmp_sigill_act
= saved_sigill_act
;
1314 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1315 tmp_sigfpe_act
= saved_sigfpe_act
;
1317 /* NODEFER: signal handler does not return (from the kernel's point of
1318 view), hence if it is to successfully catch a signal more than once,
1319 we need the NODEFER flag. */
1320 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1321 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1322 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1323 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1324 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1326 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1327 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1328 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1329 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1330 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1332 /* standard FP insns */
1334 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1337 __asm__
__volatile__("fmr 0,0");
1342 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1345 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1348 /* General-Purpose optional (fsqrt, fsqrts) */
1350 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1353 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1356 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1358 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1361 __asm__
__volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1364 /* VSX support implies Power ISA 2.06 */
1366 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1369 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1372 /* Check for Decimal Floating Point (DFP) support. */
1374 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1377 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1380 /* Check for ISA 2.07 support. */
1381 have_isa_2_07
= True
;
1382 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1383 have_isa_2_07
= False
;
1385 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1388 /* Check for ISA 3.0 support. */
1389 have_isa_3_0
= True
;
1390 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1391 have_isa_3_0
= False
;
1393 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1396 /* Check for ISA 3.1 support. */
1397 have_isa_3_1
= True
;
1398 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1399 have_isa_3_1
= False
;
1401 __asm__
__volatile__(".long 0x7f1401b6"); /* brh RA, RS */
1404 /* determine dcbz/dcbzl sizes while we still have the signal
1405 * handlers registered */
1406 find_ppc_dcbz_sz(&vai
);
1408 VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1409 VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1410 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1411 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d ISA3.1 %d\n",
1412 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1413 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1414 (Int
)have_isa_2_07
, (int)have_isa_3_0
, (int)have_isa_3_1
);
1415 /* on ppc64be, if we don't even have FP, just give up. */
1419 VG_(machine_ppc64_has_VMX
) = have_V
? 1 : 0;
1422 # if defined(VKI_LITTLE_ENDIAN)
1423 vai
.endness
= VexEndnessLE
;
1424 # elif defined(VKI_BIG_ENDIAN)
1425 vai
.endness
= VexEndnessBE
;
1427 vai
.endness
= VexEndness_INVALID
;
1431 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_V
;
1432 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_FX
;
1433 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_GX
;
1434 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_VX
;
1435 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_DFP
;
1436 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA2_07
;
1437 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_0
;
1438 if (have_isa_3_1
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_1
;
1440 VG_(machine_get_cache_info
)(&vai
);
1442 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1443 called before we're ready to go. */
1447 #elif defined(VGA_s390x)
1449 # include "libvex_s390x_common.h"
1452 /* Instruction set detection code borrowed from ppc above. */
1453 vki_sigset_t saved_set
, tmp_set
;
1454 vki_sigaction_fromK_t saved_sigill_act
;
1455 vki_sigaction_toK_t tmp_sigill_act
;
1457 volatile Bool have_LDISP
, have_STFLE
;
1460 /* If the model is "unknown" don't treat this as an error. Assume
1461 this is a brand-new machine model for which we don't have the
1462 identification yet. Keeping fingers crossed. */
1463 model
= VG_(get_machine_model
)();
1465 /* Unblock SIGILL and stash away the old action for that signal */
1466 VG_(sigemptyset
)(&tmp_set
);
1467 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1469 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1472 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1474 tmp_sigill_act
= saved_sigill_act
;
1476 /* NODEFER: signal handler does not return (from the kernel's point of
1477 view), hence if it is to successfully catch a signal more than once,
1478 we need the NODEFER flag. */
1479 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1480 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1481 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1482 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1483 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1485 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1486 is not supported on z900. */
1489 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1492 /* BASR loads the address of the next insn into r1. Needed to avoid
1493 a segfault in XY. */
1494 __asm__
__volatile__("basr %%r1,%%r0\n\t"
1495 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1496 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1499 /* Check availability of STFLE. If available store facility bits
1501 ULong hoststfle
[S390_NUM_FACILITY_DW
];
1503 for (i
= 0; i
< S390_NUM_FACILITY_DW
; ++i
)
1507 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1510 register ULong reg0
asm("0") = S390_NUM_FACILITY_DW
- 1;
1512 __asm__
__volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1513 : "=m" (hoststfle
), "+d"(reg0
)
1514 : : "cc", "memory");
1517 /* Restore signals */
1518 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1520 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1523 vai
.endness
= VexEndnessBE
;
1526 if (have_STFLE
) vai
.hwcaps
|= VEX_HWCAPS_S390X_STFLE
;
1528 /* Use long displacement only on machines >= z990. For all other
1529 machines it is millicoded and therefore slow. */
1530 if (model
>= VEX_S390X_MODEL_Z990
)
1531 vai
.hwcaps
|= VEX_HWCAPS_S390X_LDISP
;
1534 /* Detect presence of certain facilities using the STFLE insn.
1535 Note, that these facilities were introduced at the same time or later
1536 as STFLE, so the absence of STLFE implies the absence of the facility
1537 we're trying to detect. */
1538 struct fac_hwcaps_map
{
1542 const HChar name
[6]; // may need adjustment for new facility names
1544 { False
, S390_FAC_EIMM
, VEX_HWCAPS_S390X_EIMM
, "EIMM" },
1545 { False
, S390_FAC_GIE
, VEX_HWCAPS_S390X_GIE
, "GIE" },
1546 { False
, S390_FAC_DFP
, VEX_HWCAPS_S390X_DFP
, "DFP" },
1547 { False
, S390_FAC_FPSE
, VEX_HWCAPS_S390X_FGX
, "FGX" },
1548 { False
, S390_FAC_ETF2
, VEX_HWCAPS_S390X_ETF2
, "ETF2" },
1549 { False
, S390_FAC_ETF3
, VEX_HWCAPS_S390X_ETF3
, "ETF3" },
1550 { False
, S390_FAC_STCKF
, VEX_HWCAPS_S390X_STCKF
, "STCKF" },
1551 { False
, S390_FAC_FPEXT
, VEX_HWCAPS_S390X_FPEXT
, "FPEXT" },
1552 { False
, S390_FAC_LSC
, VEX_HWCAPS_S390X_LSC
, "LSC" },
1553 { False
, S390_FAC_PFPO
, VEX_HWCAPS_S390X_PFPO
, "PFPO" },
1554 { False
, S390_FAC_VX
, VEX_HWCAPS_S390X_VX
, "VX" },
1555 { False
, S390_FAC_MSA5
, VEX_HWCAPS_S390X_MSA5
, "MSA5" },
1556 { False
, S390_FAC_MI2
, VEX_HWCAPS_S390X_MI2
, "MI2" },
1557 { False
, S390_FAC_LSC2
, VEX_HWCAPS_S390X_LSC2
, "LSC2" },
1558 { False
, S390_FAC_VXE
, VEX_HWCAPS_S390X_VXE
, "VXE" },
1561 /* Set hwcaps according to the detected facilities */
1562 UChar dw_number
= 0;
1564 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1565 vg_assert(fac_hwcaps
[i
].facility_bit
<= 191); // for now
1566 dw_number
= fac_hwcaps
[i
].facility_bit
/ 64;
1567 fac_bit
= fac_hwcaps
[i
].facility_bit
% 64;
1568 if (hoststfle
[dw_number
] & (1ULL << (63 - fac_bit
))) {
1569 fac_hwcaps
[i
].installed
= True
;
1570 vai
.hwcaps
|= fac_hwcaps
[i
].hwcaps_bit
;
1574 /* Build up a string showing the probed-for facilities */
1575 HChar fac_str
[(sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]) *
1576 (sizeof fac_hwcaps
[0].name
+ 3) + // %s %d
1577 7 + 1 + 4 + 2 // machine %4d
1580 p
+= VG_(sprintf
)(p
, "machine %4d ", model
);
1581 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1582 p
+= VG_(sprintf
)(p
, " %s %1u", fac_hwcaps
[i
].name
,
1583 fac_hwcaps
[i
].installed
);
1587 VG_(debugLog
)(1, "machine", "%s\n", fac_str
);
1588 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1590 VG_(machine_get_cache_info
)(&vai
);
1595 #elif defined(VGA_arm)
1597 /* Same instruction set detection algorithm as for ppc32. */
1598 vki_sigset_t saved_set
, tmp_set
;
1599 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1600 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1602 volatile Bool have_VFP
, have_VFP2
, have_VFP3
, have_NEON
, have_V8
;
1603 volatile Int archlevel
;
1606 /* This is a kludge. Really we ought to back-convert saved_act
1607 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1608 since that's a no-op on all ppc64 platforms so far supported,
1609 it's not worth the typing effort. At least include most basic
1611 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1613 VG_(sigemptyset
)(&tmp_set
);
1614 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1615 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1617 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1620 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1622 tmp_sigill_act
= saved_sigill_act
;
1624 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1625 tmp_sigfpe_act
= saved_sigfpe_act
;
1627 /* NODEFER: signal handler does not return (from the kernel's point of
1628 view), hence if it is to successfully catch a signal more than once,
1629 we need the NODEFER flag. */
1630 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1631 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1632 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1633 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1634 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1636 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1637 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1638 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1639 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1640 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1644 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1647 __asm__
__volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1649 /* There are several generation of VFP extension but they differs very
1650 little so for now we will not distinguish them. */
1651 have_VFP2
= have_VFP
;
1652 have_VFP3
= have_VFP
;
1656 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1659 __asm__
__volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1662 /* ARM architecture level */
1663 archlevel
= 5; /* v5 will be base level */
1664 if (archlevel
< 7) {
1666 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1669 __asm__
__volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1672 if (archlevel
< 6) {
1674 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1677 __asm__
__volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1683 if (archlevel
== 7) {
1684 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1687 __asm__
__volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1689 if (have_V8
&& have_NEON
&& have_VFP3
) {
1694 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1695 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigfpe_act
, &tmp_sigfpe_act
);
1696 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1697 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1698 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1700 VG_(debugLog
)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1701 archlevel
, (Int
)have_VFP
, (Int
)have_VFP2
, (Int
)have_VFP3
,
1704 VG_(machine_arm_archlevel
) = archlevel
;
1707 vai
.endness
= VexEndnessLE
;
1709 vai
.hwcaps
= VEX_ARM_ARCHLEVEL(archlevel
);
1710 if (have_VFP3
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP3
;
1711 if (have_VFP2
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP2
;
1712 if (have_VFP
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP
;
1713 if (have_NEON
) vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1715 VG_(machine_get_cache_info
)(&vai
);
1720 #elif defined(VGA_arm64)
1722 /* Use the attribute and feature registers to determine host hardware
1723 * capabilities. Only user-space features are read. Naming conventions
1724 * follow the Arm Architecture Reference Manual.
1726 * ID_AA64ISAR0_EL1 Instruction Set Attribute Register 0
1728 * ...5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1729 * ...1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1730 * FHM DP SM4 SM3 SHA3 RDM ATOMICS
1732 * ID_AA64ISAR1_EL1 Instruction Set Attribute Register 1
1734 * ...5555 5544 4444 4444 3333 3333 3332 2222 2222 1111 1111 11
1735 * ...5432 1098 7654 3210 9876 5432 1098 7654 3210 9876 5432 1098 7654 3210
1738 * ID_AA64PFR0_EL1 Processor Feature Register 0
1740 * 6666...2222 2222 1111 1111 11
1741 * 3210...7654 3210 9876 5432 1098 7654 3210
1745 Bool is_base_v8
= False
;
1747 Bool have_fhm
, have_dp
, have_sm4
, have_sm3
, have_sha3
, have_rdm
;
1748 Bool have_atomics
, have_i8mm
, have_bf16
, have_dpbcvap
, have_dpbcvadp
;
1749 Bool have_vfp16
, have_fp16
;
1751 have_fhm
= have_dp
= have_sm4
= have_sm3
= have_sha3
= have_rdm
1752 = have_atomics
= have_i8mm
= have_bf16
= have_dpbcvap
1753 = have_dpbcvadp
= have_vfp16
= have_fp16
= False
;
1755 /* Some baseline v8.0 kernels do not allow reads of these registers. Use
1756 * the same SIGILL handling algorithm as other architectures for such
1759 vki_sigset_t saved_set
, tmp_set
;
1760 vki_sigaction_fromK_t saved_sigill_act
;
1761 vki_sigaction_toK_t tmp_sigill_act
;
1763 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1765 VG_(sigemptyset
)(&tmp_set
);
1766 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1770 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1773 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1775 tmp_sigill_act
= saved_sigill_act
;
1777 /* NODEFER: signal handler does not return (from the kernel's point of
1778 view), hence if it is to successfully catch a signal more than once,
1779 we need the NODEFER flag. */
1780 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1781 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1782 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1783 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1784 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1786 /* Does reading ID_AA64ISAR0_EL1 register throw SIGILL on base v8.0? */
1787 if (VG_MINIMAL_SETJMP(env_unsup_insn
))
1790 __asm__
__volatile__("mrs x0, ID_AA64ISAR0_EL1");
1792 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1793 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1794 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1797 vai
.endness
= VexEndnessLE
;
1799 /* Baseline features are v8.0. */
1802 VG_(machine_get_cache_info
)(&vai
);
1804 /* Check whether we need to use the fallback LLSC implementation.
1805 If the check fails, give up. */
1806 if (! VG_(parse_cpuinfo
)())
1809 /* 0 denotes 'not set'. The range of legitimate values here,
1810 after being set that is, is 2 though 17 inclusive. */
1811 vg_assert(vai
.arm64_dMinLine_lg2_szB
== 0);
1812 vg_assert(vai
.arm64_iMinLine_lg2_szB
== 0);
1814 __asm__
__volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0
));
1815 vai
.arm64_dMinLine_lg2_szB
= ((ctr_el0
>> 16) & 0xF) + 2;
1816 vai
.arm64_iMinLine_lg2_szB
= ((ctr_el0
>> 0) & 0xF) + 2;
1817 VG_(debugLog
)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1818 "ctr_el0.iMinLine_szB = %d\n",
1819 1 << vai
.arm64_dMinLine_lg2_szB
,
1820 1 << vai
.arm64_iMinLine_lg2_szB
);
1821 VG_(debugLog
)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1822 vai
.arm64_requires_fallback_LLSC
? "yes" : "no");
1827 /* ID_AA64ISAR0_EL1 Instruction set attribute register 0 fields */
1828 #define ID_AA64ISAR0_FHM_SHIFT 48
1829 #define ID_AA64ISAR0_DP_SHIFT 44
1830 #define ID_AA64ISAR0_SM4_SHIFT 40
1831 #define ID_AA64ISAR0_SM3_SHIFT 36
1832 #define ID_AA64ISAR0_SHA3_SHIFT 32
1833 #define ID_AA64ISAR0_RDM_SHIFT 28
1834 #define ID_AA64ISAR0_ATOMICS_SHIFT 20
1836 #define ID_AA64ISAR0_FHM_SUPPORTED 0x1
1837 #define ID_AA64ISAR0_DP_SUPPORTED 0x1
1838 #define ID_AA64ISAR0_SM4_SUPPORTED 0x1
1839 #define ID_AA64ISAR0_SM3_SUPPORTED 0x1
1840 #define ID_AA64ISAR0_SHA3_SUPPORTED 0x1
1841 #define ID_AA64ISAR0_RDM_SUPPORTED 0x1
1842 #define ID_AA64ISAR0_ATOMICS_SUPPORTED 0x2
1844 /* ID_AA64ISAR1_EL1 Instruction set attribute register 1 fields */
1845 #define ID_AA64ISAR1_I8MM_SHIFT 52
1846 #define ID_AA64ISAR1_BF16_SHIFT 44
1847 #define ID_AA64ISAR1_DPB_SHIFT 0
1849 #define ID_AA64ISAR1_I8MM_SUPPORTED 0x1
1850 #define ID_AA64ISAR1_BF16_SUPPORTED 0x1
1851 #define ID_AA64ISAR1_DPBCVAP_SUPPORTED 0x1
1852 #define ID_AA64ISAR1_DPBCVADP_SUPPORTED 0x2
1854 /* ID_AA64PFR0_EL1 Processor feature register 0 fields */
1855 #define ID_AA64PFR0_VFP16_SHIFT 20
1856 #define ID_AA64PFR0_FP16_SHIFT 16
1858 #define ID_AA64PFR0_VFP16_SUPPORTED 0x1
1859 #define ID_AA64PFR0_FP16_SUPPORTED 0x1
1861 #define get_cpu_ftr(id) ({ \
1862 unsigned long val; \
1863 asm("mrs %0, "#id : "=r" (val)); \
1864 VG_(debugLog)(1, "machine", "ARM64: %-20s: 0x%016lx\n", #id, val); \
1866 get_cpu_ftr(ID_AA64ISAR0_EL1
);
1867 get_cpu_ftr(ID_AA64ISAR1_EL1
);
1868 get_cpu_ftr(ID_AA64PFR0_EL1
);
1870 #define get_ftr(id, ftr, fval, have_ftr) ({ \
1871 unsigned long rval; \
1872 asm("mrs %0, "#id : "=r" (rval)); \
1873 have_ftr = (fval & ((rval >> ftr) & 0xf)) >= fval ? True : False; \
1876 /* Read ID_AA64ISAR0_EL1 attributes */
1878 /* FHM indicates support for FMLAL and FMLSL instructions.
1879 * Optional for v8.2.
1881 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_FHM_SHIFT
,
1882 ID_AA64ISAR0_FHM_SUPPORTED
, have_fhm
);
1884 /* DP indicates support for UDOT and SDOT instructions.
1885 * Optional for v8.2.
1887 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_DP_SHIFT
,
1888 ID_AA64ISAR0_DP_SUPPORTED
, have_dp
);
1890 /* SM4 indicates support for SM4E and SM4EKEY instructions.
1891 * Optional for v8.2.
1893 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_SM4_SHIFT
,
1894 ID_AA64ISAR0_SM4_SUPPORTED
, have_sm4
);
1896 /* SM3 indicates support for SM3SS1, SM3TT1A, SM3TT1B, SM3TT2A, * SM3TT2B,
1897 * SM3PARTW1, and SM3PARTW2 instructions.
1898 * Optional for v8.2.
1900 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_SM3_SHIFT
,
1901 ID_AA64ISAR0_SM3_SUPPORTED
, have_sm3
);
1903 /* SHA3 indicates support for EOR3, RAX1, XAR, and BCAX instructions.
1904 * Optional for v8.2.
1906 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_SHA3_SHIFT
,
1907 ID_AA64ISAR0_SHA3_SUPPORTED
, have_sha3
);
1909 /* RDM indicates support for SQRDMLAH and SQRDMLSH instructions.
1910 * Mandatory from v8.1 onwards.
1912 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_RDM_SHIFT
,
1913 ID_AA64ISAR0_RDM_SUPPORTED
, have_rdm
);
1915 /* v8.1 ATOMICS indicates support for LDADD, LDCLR, LDEOR, LDSET, LDSMAX,
1916 * LDSMIN, LDUMAX, LDUMIN, CAS, CASP, and SWP instructions.
1917 * Mandatory from v8.1 onwards.
1919 get_ftr(ID_AA64ISAR0_EL1
, ID_AA64ISAR0_ATOMICS_SHIFT
,
1920 ID_AA64ISAR0_ATOMICS_SUPPORTED
, have_atomics
);
1922 /* Read ID_AA64ISAR1_EL1 attributes */
1924 /* I8MM indicates support for SMMLA, SUDOT, UMMLA, USMMLA, and USDOT
1926 * Optional for v8.2.
1928 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_I8MM_SHIFT
,
1929 ID_AA64ISAR1_I8MM_SUPPORTED
, have_i8mm
);
1931 /* BF16 indicates support for BFDOT, BFMLAL, BFMLAL2, BFMMLA, BFCVT, and
1932 * BFCVT2 instructions.
1933 * Optional for v8.2.
1935 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_BF16_SHIFT
,
1936 ID_AA64ISAR1_BF16_SUPPORTED
, have_bf16
);
1938 /* DPB indicates support for DC CVAP instruction.
1939 * Mandatory for v8.2 onwards.
1941 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_DPB_SHIFT
,
1942 ID_AA64ISAR1_DPBCVAP_SUPPORTED
, have_dpbcvap
);
1944 /* DPB indicates support for DC CVADP instruction.
1945 * Optional for v8.2.
1947 get_ftr(ID_AA64ISAR1_EL1
, ID_AA64ISAR1_DPB_SHIFT
,
1948 ID_AA64ISAR1_DPBCVADP_SUPPORTED
, have_dpbcvadp
);
1950 /* Read ID_AA64PFR0_EL1 attributes */
1952 /* VFP16 indicates support for half-precision vector arithmetic.
1953 * Optional for v8.2. Must be the same value as FP16.
1955 get_ftr(ID_AA64PFR0_EL1
, ID_AA64PFR0_VFP16_SHIFT
,
1956 ID_AA64PFR0_VFP16_SUPPORTED
, have_vfp16
);
1958 /* FP16 indicates support for half-precision scalar arithmetic.
1959 * Optional for v8.2. Must be the same value as VFP16.
1961 get_ftr(ID_AA64PFR0_EL1
, ID_AA64PFR0_FP16_SHIFT
,
1962 ID_AA64PFR0_FP16_SUPPORTED
, have_fp16
);
1964 if (have_fhm
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_FHM
;
1965 if (have_dpbcvap
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_DPBCVAP
;
1966 if (have_dpbcvadp
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_DPBCVADP
;
1967 if (have_sm3
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_SM3
;
1968 if (have_sm4
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_SM4
;
1969 if (have_sha3
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_SHA3
;
1970 if (have_rdm
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_RDM
;
1971 if (have_i8mm
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_I8MM
;
1972 if (have_atomics
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_ATOMICS
;
1973 if (have_bf16
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_BF16
;
1974 if (have_fp16
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_FP16
;
1975 if (have_vfp16
) vai
.hwcaps
|= VEX_HWCAPS_ARM64_VFP16
;
1983 #elif defined(VGA_mips32)
1985 /* Define the position of F64 bit in FIR register. */
1988 if (!VG_(parse_cpuinfo
)())
1991 # if defined(VKI_LITTLE_ENDIAN)
1992 vai
.endness
= VexEndnessLE
;
1993 # elif defined(VKI_BIG_ENDIAN)
1994 vai
.endness
= VexEndnessBE
;
1996 vai
.endness
= VexEndness_INVALID
;
1999 /* Same instruction set detection algorithm as for ppc32/arm... */
2000 vki_sigset_t saved_set
, tmp_set
;
2001 vki_sigaction_fromK_t saved_sigill_act
;
2002 vki_sigaction_toK_t tmp_sigill_act
;
2004 volatile Bool have_DSP
, have_DSPr2
, have_MSA
;
2007 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
2009 VG_(sigemptyset
)(&tmp_set
);
2010 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
2012 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
2015 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
2017 tmp_sigill_act
= saved_sigill_act
;
2019 /* NODEFER: signal handler does not return (from the kernel's point of
2020 view), hence if it is to successfully catch a signal more than once,
2021 we need the NODEFER flag. */
2022 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
2023 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
2024 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
2025 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
2026 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2028 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
2030 /* MSA instructions. */
2032 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2035 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2038 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
2040 /* DSPr2 instructions. */
2042 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2045 __asm__
__volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
2048 /* We assume it's 74K, since it can run DSPr2. */
2049 vai
.hwcaps
|= VEX_PRID_IMP_74K
;
2051 /* DSP instructions. */
2053 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2056 __asm__
__volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
2059 /* We assume it's 34K, since it has support for DSP. */
2060 vai
.hwcaps
|= VEX_PRID_IMP_34K
;
2066 # if defined(VGP_mips32_linux)
2067 Int fpmode
= VG_(prctl
)(VKI_PR_GET_FP_MODE
, 0, 0, 0, 0);
2073 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
2074 we are using alternative way to determine FP mode */
2077 if (!VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2080 ".set noreorder\n\t"
2082 ".set hardfloat\n\t"
2083 "lui $t0, 0x3FF0\n\t"
2090 : "t0", "$f0", "$f1", "memory");
2092 fpmode
= (result
!= 0x3FF0000000000000ull
);
2097 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
2099 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
2100 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2101 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
2103 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
2104 VG_(machine_get_cache_info
)(&vai
);
2109 #elif defined(VGA_mips64)
2112 if (!VG_(parse_cpuinfo
)())
2115 # if defined(VKI_LITTLE_ENDIAN)
2116 vai
.endness
= VexEndnessLE
;
2117 # elif defined(VKI_BIG_ENDIAN)
2118 vai
.endness
= VexEndnessBE
;
2120 vai
.endness
= VexEndness_INVALID
;
2123 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
2125 /* Same instruction set detection algorithm as for ppc32/arm... */
2126 vki_sigset_t saved_set
, tmp_set
;
2127 vki_sigaction_fromK_t saved_sigill_act
;
2128 vki_sigaction_toK_t tmp_sigill_act
;
2130 volatile Bool have_MSA
;
2133 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
2135 VG_(sigemptyset
)(&tmp_set
);
2136 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
2138 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
2141 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
2143 tmp_sigill_act
= saved_sigill_act
;
2145 /* NODEFER: signal handler does not return (from the kernel's point of
2146 view), hence if it is to successfully catch a signal more than once,
2147 we need the NODEFER flag. */
2148 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
2149 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
2150 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
2151 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
2152 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2154 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
2156 /* MSA instructions */
2158 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
2161 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
2164 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
2168 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
2169 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
2170 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
2172 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
2174 VG_(machine_get_cache_info
)(&vai
);
2179 #elif defined(VGP_nanomips_linux)
2181 va
= VexArchNANOMIPS
;
2184 # if defined(VKI_LITTLE_ENDIAN)
2185 vai
.endness
= VexEndnessLE
;
2186 # elif defined(VKI_BIG_ENDIAN)
2187 vai
.endness
= VexEndnessBE
;
2189 vai
.endness
= VexEndness_INVALID
;
2192 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
2194 VG_(machine_get_cache_info
)(&vai
);
2199 # error "Unknown arch"
2203 /* Notify host cpu instruction cache line size. */
2204 #if defined(VGA_ppc32)
2205 void VG_(machine_ppc32_set_clszB
)( Int szB
)
2207 vg_assert(hwcaps_done
);
2209 /* Either the value must not have been set yet (zero) or we can
2210 tolerate it being set to the same value multiple times, as the
2211 stack scanning logic in m_main is a bit stupid. */
2212 vg_assert(vai
.ppc_icache_line_szB
== 0
2213 || vai
.ppc_icache_line_szB
== szB
);
2215 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
2216 vai
.ppc_icache_line_szB
= szB
;
2221 /* Notify host cpu instruction cache line size. */
2222 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
2223 void VG_(machine_ppc64_set_clszB
)( Int szB
)
2225 vg_assert(hwcaps_done
);
2227 /* Either the value must not have been set yet (zero) or we can
2228 tolerate it being set to the same value multiple times, as the
2229 stack scanning logic in m_main is a bit stupid. */
2230 vg_assert(vai
.ppc_icache_line_szB
== 0
2231 || vai
.ppc_icache_line_szB
== szB
);
2233 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
2234 vai
.ppc_icache_line_szB
= szB
;
2239 /* Notify host's ability to handle NEON instructions. */
2240 #if defined(VGA_arm)
2241 void VG_(machine_arm_set_has_NEON
)( Bool has_neon
)
2243 vg_assert(hwcaps_done
);
2244 /* There's nothing else we can sanity check. */
2247 vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
2249 vai
.hwcaps
&= ~VEX_HWCAPS_ARM_NEON
;
2255 /* Fetch host cpu info, once established. */
2256 void VG_(machine_get_VexArchInfo
)( /*OUT*/VexArch
* pVa
,
2257 /*OUT*/VexArchInfo
* pVai
)
2259 vg_assert(hwcaps_done
);
2261 if (pVai
) *pVai
= vai
;
2265 /* Returns the size of the largest guest register that we will
2266 simulate in this run. This depends on both the guest architecture
2267 and on the specific capabilities we are simulating for that guest
2268 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2269 or 32. General rule: if in doubt, return a value larger than
2272 This information is needed by Cachegrind and Callgrind to decide
2273 what the minimum cache line size they are prepared to simulate is.
2274 Basically require that the minimum cache line size is at least as
2275 large as the largest register that might get transferred to/from
2276 memory, so as to guarantee that any such transaction can straddle
2277 at most 2 cache lines.
2279 Int
VG_(machine_get_size_of_largest_guest_register
) ( void )
2281 vg_assert(hwcaps_done
);
2282 /* Once hwcaps_done is True, we can fish around inside va/vai to
2283 find the information we need. */
2285 # if defined(VGA_x86)
2286 vg_assert(va
== VexArchX86
);
2287 /* We don't support AVX, so 32 is out. At the other end, even if
2288 we don't support any SSE, the X87 can generate 10 byte
2289 transfers, so let's say 16 to be on the safe side. Hence the
2290 answer is always 16. */
2293 # elif defined(VGA_amd64)
2294 /* if AVX then 32 else 16 */
2295 return (vai
.hwcaps
& VEX_HWCAPS_AMD64_AVX
) ? 32 : 16;
2297 # elif defined(VGA_ppc32)
2298 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2299 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_V
) return 16;
2300 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_VX
) return 16;
2301 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_DFP
) return 16;
2304 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2305 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2306 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_V
) return 16;
2307 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_VX
) return 16;
2308 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_DFP
) return 16;
2311 # elif defined(VGA_s390x)
2314 # elif defined(VGA_arm)
2315 /* Really it depends whether or not we have NEON, but let's just
2316 assume we always do. */
2319 # elif defined(VGA_arm64)
2320 /* ARM64 always has Neon, AFAICS. */
2323 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
2324 /* The guest state implies 4, but that can't really be true, can
2328 # elif defined(VGA_mips64)
2332 # error "Unknown arch"
2337 // Given a pointer to a function as obtained by "& functionname" in C,
2338 // produce a pointer to the actual entry point for the function.
2339 void* VG_(fnptr_to_fnentry
)( void* f
)
2341 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2342 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2343 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2344 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2345 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2346 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
2347 || defined(VGP_nanomips_linux)
2349 # elif defined(VGP_ppc64be_linux)
2350 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2351 3-word function descriptor, of which the first word is the entry
2353 UWord
* descr
= (UWord
*)f
;
2354 return (void*)(descr
[0]);
2356 # error "Unknown platform"
2360 /*--------------------------------------------------------------------*/
2362 /*--------------------------------------------------------------------*/