1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation
9 Copyright (C) 2000-2017 Julian Seward
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr
VG_(get_IP
) ( ThreadId tid
) {
50 return INSTR_PTR( VG_(threads
)[tid
].arch
);
52 Addr
VG_(get_SP
) ( ThreadId tid
) {
53 return STACK_PTR( VG_(threads
)[tid
].arch
);
55 Addr
VG_(get_FP
) ( ThreadId tid
) {
56 return FRAME_PTR( VG_(threads
)[tid
].arch
);
59 Addr
VG_(get_SP_s1
) ( ThreadId tid
) {
60 return STACK_PTR_S1( VG_(threads
)[tid
].arch
);
62 void VG_(set_SP_s1
) ( ThreadId tid
, Addr sp
) {
63 STACK_PTR_S1( VG_(threads
)[tid
].arch
) = sp
;
66 void VG_(set_IP
) ( ThreadId tid
, Addr ip
) {
67 INSTR_PTR( VG_(threads
)[tid
].arch
) = ip
;
69 void VG_(set_SP
) ( ThreadId tid
, Addr sp
) {
70 STACK_PTR( VG_(threads
)[tid
].arch
) = sp
;
73 void VG_(get_UnwindStartRegs
) ( /*OUT*/UnwindStartRegs
* regs
,
77 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_EIP
;
78 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_ESP
;
80 = VG_(threads
)[tid
].arch
.vex
.guest_EBP
;
81 # elif defined(VGA_amd64)
82 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_RIP
;
83 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_RSP
;
84 regs
->misc
.AMD64
.r_rbp
85 = VG_(threads
)[tid
].arch
.vex
.guest_RBP
;
86 # elif defined(VGA_ppc32)
87 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
88 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
90 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
93 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
95 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
96 # elif defined(VGA_arm)
97 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R15T
;
98 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R13
;
100 = VG_(threads
)[tid
].arch
.vex
.guest_R14
;
102 = VG_(threads
)[tid
].arch
.vex
.guest_R12
;
104 = VG_(threads
)[tid
].arch
.vex
.guest_R11
;
106 = VG_(threads
)[tid
].arch
.vex
.guest_R7
;
107 # elif defined(VGA_arm64)
108 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
109 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_XSP
;
110 regs
->misc
.ARM64
.x29
= VG_(threads
)[tid
].arch
.vex
.guest_X29
;
111 regs
->misc
.ARM64
.x30
= VG_(threads
)[tid
].arch
.vex
.guest_X30
;
112 # elif defined(VGA_s390x)
113 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_IA
;
114 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_SP
;
115 regs
->misc
.S390X
.r_fp
116 = VG_(threads
)[tid
].arch
.vex
.guest_FP
;
117 regs
->misc
.S390X
.r_lr
118 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs
->misc
.S390X
.r_f0
121 = VG_(threads
)[tid
].arch
.vex
.guest_v0
.w64
[0];
122 regs
->misc
.S390X
.r_f1
123 = VG_(threads
)[tid
].arch
.vex
.guest_v1
.w64
[0];
124 regs
->misc
.S390X
.r_f2
125 = VG_(threads
)[tid
].arch
.vex
.guest_v2
.w64
[0];
126 regs
->misc
.S390X
.r_f3
127 = VG_(threads
)[tid
].arch
.vex
.guest_v3
.w64
[0];
128 regs
->misc
.S390X
.r_f4
129 = VG_(threads
)[tid
].arch
.vex
.guest_v4
.w64
[0];
130 regs
->misc
.S390X
.r_f5
131 = VG_(threads
)[tid
].arch
.vex
.guest_v5
.w64
[0];
132 regs
->misc
.S390X
.r_f6
133 = VG_(threads
)[tid
].arch
.vex
.guest_v6
.w64
[0];
134 regs
->misc
.S390X
.r_f7
135 = VG_(threads
)[tid
].arch
.vex
.guest_v7
.w64
[0];
136 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
137 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
138 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
139 regs
->misc
.MIPS32
.r30
140 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
141 regs
->misc
.MIPS32
.r31
142 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
143 regs
->misc
.MIPS32
.r28
144 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
145 # elif defined(VGA_mips64)
146 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
147 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
148 regs
->misc
.MIPS64
.r30
149 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
150 regs
->misc
.MIPS64
.r31
151 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
152 regs
->misc
.MIPS64
.r28
153 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
155 # error "Unknown arch"
160 VG_(get_shadow_regs_area
) ( ThreadId tid
,
162 /*SRC*/Int shadowNo
, PtrdiffT offset
, SizeT size
)
166 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
167 vg_assert(VG_(is_valid_tid
)(tid
));
169 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
170 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
172 tst
= & VG_(threads
)[tid
];
175 case 0: src
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
176 case 1: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
177 case 2: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
179 vg_assert(src
!= NULL
);
180 VG_(memcpy
)( dst
, src
, size
);
184 VG_(set_shadow_regs_area
) ( ThreadId tid
,
185 /*DST*/Int shadowNo
, PtrdiffT offset
, SizeT size
,
186 /*SRC*/const UChar
* src
)
190 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
191 vg_assert(VG_(is_valid_tid
)(tid
));
193 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
194 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
196 tst
= & VG_(threads
)[tid
];
199 case 0: dst
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
200 case 1: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
201 case 2: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
203 vg_assert(dst
!= NULL
);
204 VG_(memcpy
)( dst
, src
, size
);
208 static void apply_to_GPs_of_tid(ThreadId tid
, void (*f
)(ThreadId
,
211 VexGuestArchState
* vex
= &(VG_(get_ThreadState
)(tid
)->arch
.vex
);
212 VG_(debugLog
)(2, "machine", "apply_to_GPs_of_tid %u\n", tid
);
214 (*f
)(tid
, "EAX", vex
->guest_EAX
);
215 (*f
)(tid
, "ECX", vex
->guest_ECX
);
216 (*f
)(tid
, "EDX", vex
->guest_EDX
);
217 (*f
)(tid
, "EBX", vex
->guest_EBX
);
218 (*f
)(tid
, "ESI", vex
->guest_ESI
);
219 (*f
)(tid
, "EDI", vex
->guest_EDI
);
220 (*f
)(tid
, "ESP", vex
->guest_ESP
);
221 (*f
)(tid
, "EBP", vex
->guest_EBP
);
222 #elif defined(VGA_amd64)
223 (*f
)(tid
, "RAX", vex
->guest_RAX
);
224 (*f
)(tid
, "RCX", vex
->guest_RCX
);
225 (*f
)(tid
, "RDX", vex
->guest_RDX
);
226 (*f
)(tid
, "RBX", vex
->guest_RBX
);
227 (*f
)(tid
, "RSI", vex
->guest_RSI
);
228 (*f
)(tid
, "RDI", vex
->guest_RDI
);
229 (*f
)(tid
, "RSP", vex
->guest_RSP
);
230 (*f
)(tid
, "RBP", vex
->guest_RBP
);
231 (*f
)(tid
, "R8" , vex
->guest_R8
);
232 (*f
)(tid
, "R9" , vex
->guest_R9
);
233 (*f
)(tid
, "R10", vex
->guest_R10
);
234 (*f
)(tid
, "R11", vex
->guest_R11
);
235 (*f
)(tid
, "R12", vex
->guest_R12
);
236 (*f
)(tid
, "R13", vex
->guest_R13
);
237 (*f
)(tid
, "R14", vex
->guest_R14
);
238 (*f
)(tid
, "R15", vex
->guest_R15
);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f
)(tid
, "GPR0" , vex
->guest_GPR0
);
241 (*f
)(tid
, "GPR1" , vex
->guest_GPR1
);
242 (*f
)(tid
, "GPR2" , vex
->guest_GPR2
);
243 (*f
)(tid
, "GPR3" , vex
->guest_GPR3
);
244 (*f
)(tid
, "GPR4" , vex
->guest_GPR4
);
245 (*f
)(tid
, "GPR5" , vex
->guest_GPR5
);
246 (*f
)(tid
, "GPR6" , vex
->guest_GPR6
);
247 (*f
)(tid
, "GPR7" , vex
->guest_GPR7
);
248 (*f
)(tid
, "GPR8" , vex
->guest_GPR8
);
249 (*f
)(tid
, "GPR9" , vex
->guest_GPR9
);
250 (*f
)(tid
, "GPR10", vex
->guest_GPR10
);
251 (*f
)(tid
, "GPR11", vex
->guest_GPR11
);
252 (*f
)(tid
, "GPR12", vex
->guest_GPR12
);
253 (*f
)(tid
, "GPR13", vex
->guest_GPR13
);
254 (*f
)(tid
, "GPR14", vex
->guest_GPR14
);
255 (*f
)(tid
, "GPR15", vex
->guest_GPR15
);
256 (*f
)(tid
, "GPR16", vex
->guest_GPR16
);
257 (*f
)(tid
, "GPR17", vex
->guest_GPR17
);
258 (*f
)(tid
, "GPR18", vex
->guest_GPR18
);
259 (*f
)(tid
, "GPR19", vex
->guest_GPR19
);
260 (*f
)(tid
, "GPR20", vex
->guest_GPR20
);
261 (*f
)(tid
, "GPR21", vex
->guest_GPR21
);
262 (*f
)(tid
, "GPR22", vex
->guest_GPR22
);
263 (*f
)(tid
, "GPR23", vex
->guest_GPR23
);
264 (*f
)(tid
, "GPR24", vex
->guest_GPR24
);
265 (*f
)(tid
, "GPR25", vex
->guest_GPR25
);
266 (*f
)(tid
, "GPR26", vex
->guest_GPR26
);
267 (*f
)(tid
, "GPR27", vex
->guest_GPR27
);
268 (*f
)(tid
, "GPR28", vex
->guest_GPR28
);
269 (*f
)(tid
, "GPR29", vex
->guest_GPR29
);
270 (*f
)(tid
, "GPR30", vex
->guest_GPR30
);
271 (*f
)(tid
, "GPR31", vex
->guest_GPR31
);
272 (*f
)(tid
, "CTR" , vex
->guest_CTR
);
273 (*f
)(tid
, "LR" , vex
->guest_LR
);
274 #elif defined(VGA_arm)
275 (*f
)(tid
, "R0" , vex
->guest_R0
);
276 (*f
)(tid
, "R1" , vex
->guest_R1
);
277 (*f
)(tid
, "R2" , vex
->guest_R2
);
278 (*f
)(tid
, "R3" , vex
->guest_R3
);
279 (*f
)(tid
, "R4" , vex
->guest_R4
);
280 (*f
)(tid
, "R5" , vex
->guest_R5
);
281 (*f
)(tid
, "R6" , vex
->guest_R6
);
282 (*f
)(tid
, "R8" , vex
->guest_R8
);
283 (*f
)(tid
, "R9" , vex
->guest_R9
);
284 (*f
)(tid
, "R10", vex
->guest_R10
);
285 (*f
)(tid
, "R11", vex
->guest_R11
);
286 (*f
)(tid
, "R12", vex
->guest_R12
);
287 (*f
)(tid
, "R13", vex
->guest_R13
);
288 (*f
)(tid
, "R14", vex
->guest_R14
);
289 #elif defined(VGA_s390x)
290 (*f
)(tid
, "r0" , vex
->guest_r0
);
291 (*f
)(tid
, "r1" , vex
->guest_r1
);
292 (*f
)(tid
, "r2" , vex
->guest_r2
);
293 (*f
)(tid
, "r3" , vex
->guest_r3
);
294 (*f
)(tid
, "r4" , vex
->guest_r4
);
295 (*f
)(tid
, "r5" , vex
->guest_r5
);
296 (*f
)(tid
, "r6" , vex
->guest_r6
);
297 (*f
)(tid
, "r7" , vex
->guest_r7
);
298 (*f
)(tid
, "r8" , vex
->guest_r8
);
299 (*f
)(tid
, "r9" , vex
->guest_r9
);
300 (*f
)(tid
, "r10", vex
->guest_r10
);
301 (*f
)(tid
, "r11", vex
->guest_r11
);
302 (*f
)(tid
, "r12", vex
->guest_r12
);
303 (*f
)(tid
, "r13", vex
->guest_r13
);
304 (*f
)(tid
, "r14", vex
->guest_r14
);
305 (*f
)(tid
, "r15", vex
->guest_r15
);
306 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGP_nanomips_linux)
307 (*f
)(tid
, "r0" , vex
->guest_r0
);
308 (*f
)(tid
, "r1" , vex
->guest_r1
);
309 (*f
)(tid
, "r2" , vex
->guest_r2
);
310 (*f
)(tid
, "r3" , vex
->guest_r3
);
311 (*f
)(tid
, "r4" , vex
->guest_r4
);
312 (*f
)(tid
, "r5" , vex
->guest_r5
);
313 (*f
)(tid
, "r6" , vex
->guest_r6
);
314 (*f
)(tid
, "r7" , vex
->guest_r7
);
315 (*f
)(tid
, "r8" , vex
->guest_r8
);
316 (*f
)(tid
, "r9" , vex
->guest_r9
);
317 (*f
)(tid
, "r10", vex
->guest_r10
);
318 (*f
)(tid
, "r11", vex
->guest_r11
);
319 (*f
)(tid
, "r12", vex
->guest_r12
);
320 (*f
)(tid
, "r13", vex
->guest_r13
);
321 (*f
)(tid
, "r14", vex
->guest_r14
);
322 (*f
)(tid
, "r15", vex
->guest_r15
);
323 (*f
)(tid
, "r16", vex
->guest_r16
);
324 (*f
)(tid
, "r17", vex
->guest_r17
);
325 (*f
)(tid
, "r18", vex
->guest_r18
);
326 (*f
)(tid
, "r19", vex
->guest_r19
);
327 (*f
)(tid
, "r20", vex
->guest_r20
);
328 (*f
)(tid
, "r21", vex
->guest_r21
);
329 (*f
)(tid
, "r22", vex
->guest_r22
);
330 (*f
)(tid
, "r23", vex
->guest_r23
);
331 (*f
)(tid
, "r24", vex
->guest_r24
);
332 (*f
)(tid
, "r25", vex
->guest_r25
);
333 (*f
)(tid
, "r26", vex
->guest_r26
);
334 (*f
)(tid
, "r27", vex
->guest_r27
);
335 (*f
)(tid
, "r28", vex
->guest_r28
);
336 (*f
)(tid
, "r29", vex
->guest_r29
);
337 (*f
)(tid
, "r30", vex
->guest_r30
);
338 (*f
)(tid
, "r31", vex
->guest_r31
);
339 #elif defined(VGA_arm64)
340 (*f
)(tid
, "x0" , vex
->guest_X0
);
341 (*f
)(tid
, "x1" , vex
->guest_X1
);
342 (*f
)(tid
, "x2" , vex
->guest_X2
);
343 (*f
)(tid
, "x3" , vex
->guest_X3
);
344 (*f
)(tid
, "x4" , vex
->guest_X4
);
345 (*f
)(tid
, "x5" , vex
->guest_X5
);
346 (*f
)(tid
, "x6" , vex
->guest_X6
);
347 (*f
)(tid
, "x7" , vex
->guest_X7
);
348 (*f
)(tid
, "x8" , vex
->guest_X8
);
349 (*f
)(tid
, "x9" , vex
->guest_X9
);
350 (*f
)(tid
, "x10", vex
->guest_X10
);
351 (*f
)(tid
, "x11", vex
->guest_X11
);
352 (*f
)(tid
, "x12", vex
->guest_X12
);
353 (*f
)(tid
, "x13", vex
->guest_X13
);
354 (*f
)(tid
, "x14", vex
->guest_X14
);
355 (*f
)(tid
, "x15", vex
->guest_X15
);
356 (*f
)(tid
, "x16", vex
->guest_X16
);
357 (*f
)(tid
, "x17", vex
->guest_X17
);
358 (*f
)(tid
, "x18", vex
->guest_X18
);
359 (*f
)(tid
, "x19", vex
->guest_X19
);
360 (*f
)(tid
, "x20", vex
->guest_X20
);
361 (*f
)(tid
, "x21", vex
->guest_X21
);
362 (*f
)(tid
, "x22", vex
->guest_X22
);
363 (*f
)(tid
, "x23", vex
->guest_X23
);
364 (*f
)(tid
, "x24", vex
->guest_X24
);
365 (*f
)(tid
, "x25", vex
->guest_X25
);
366 (*f
)(tid
, "x26", vex
->guest_X26
);
367 (*f
)(tid
, "x27", vex
->guest_X27
);
368 (*f
)(tid
, "x28", vex
->guest_X28
);
369 (*f
)(tid
, "x29", vex
->guest_X29
);
370 (*f
)(tid
, "x30", vex
->guest_X30
);
377 void VG_(apply_to_GP_regs
)(void (*f
)(ThreadId
, const HChar
*, UWord
))
381 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
382 if (VG_(is_valid_tid
)(tid
)
383 || VG_(threads
)[tid
].exitreason
== VgSrc_ExitProcess
) {
384 // live thread or thread instructed to die by another thread that
386 apply_to_GPs_of_tid(tid
, f
);
391 void VG_(thread_stack_reset_iter
)(/*OUT*/ThreadId
* tid
)
393 *tid
= (ThreadId
)(-1);
396 Bool
VG_(thread_stack_next
)(/*MOD*/ThreadId
* tid
,
397 /*OUT*/Addr
* stack_min
,
398 /*OUT*/Addr
* stack_max
)
401 for (i
= (*tid
)+1; i
< VG_N_THREADS
; i
++) {
402 if (i
== VG_INVALID_THREADID
)
404 if (VG_(threads
)[i
].status
!= VgTs_Empty
) {
406 *stack_min
= VG_(get_SP
)(i
);
407 *stack_max
= VG_(threads
)[i
].client_stack_highest_byte
;
414 Addr
VG_(thread_get_stack_max
)(ThreadId tid
)
416 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
417 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
418 return VG_(threads
)[tid
].client_stack_highest_byte
;
421 SizeT
VG_(thread_get_stack_size
)(ThreadId tid
)
423 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
424 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
425 return VG_(threads
)[tid
].client_stack_szB
;
428 Addr
VG_(thread_get_altstack_min
)(ThreadId tid
)
430 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
431 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
432 return (Addr
)VG_(threads
)[tid
].altstack
.ss_sp
;
435 SizeT
VG_(thread_get_altstack_size
)(ThreadId tid
)
437 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
438 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
439 return VG_(threads
)[tid
].altstack
.ss_size
;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done
= False
;
458 /* --- all archs --- */
459 static VexArch va
= VexArch_INVALID
;
460 static VexArchInfo vai
;
463 UInt
VG_(machine_x86_have_mxcsr
) = 0;
465 #if defined(VGA_ppc32)
466 UInt
VG_(machine_ppc32_has_FP
) = 0;
467 UInt
VG_(machine_ppc32_has_VMX
) = 0;
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong
VG_(machine_ppc64_has_VMX
) = 0;
473 Int
VG_(machine_arm_archlevel
) = 4;
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) \
481 || defined(VGA_mips64)
482 #include "pub_core_libcsetjmp.h"
483 static VG_MINIMAL_JMP_BUF(env_unsup_insn
);
484 static void handler_unsup_insn ( Int x
) {
485 VG_MINIMAL_LONGJMP(env_unsup_insn
);
490 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
491 * handlers are installed. Determines the sizes affected by dcbz
492 * and dcbzl instructions and updates the given VexArchInfo structure
495 * Not very defensive: assumes that as long as the dcbz/dcbzl
496 * instructions don't raise a SIGILL, that they will zero an aligned,
497 * contiguous block of memory of a sensible size. */
498 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
499 static void find_ppc_dcbz_sz(VexArchInfo
*arch_info
)
503 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
504 char test_block
[4*MAX_DCBZL_SZB
];
505 char *aligned
= test_block
;
508 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
509 aligned
= (char *)(((HWord
)aligned
+ MAX_DCBZL_SZB
) & ~(MAX_DCBZL_SZB
- 1));
510 vg_assert((aligned
+ MAX_DCBZL_SZB
) <= &test_block
[sizeof(test_block
)]);
512 /* dcbz often clears 32B, although sometimes whatever the native cache
514 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
515 __asm__
__volatile__("dcbz 0,%0"
517 : "r" (aligned
) /*in*/
518 : "memory" /*clobber*/);
519 for (dcbz_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
523 vg_assert(dcbz_szB
== 16 || dcbz_szB
== 32 || dcbz_szB
== 64 || dcbz_szB
== 128);
525 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
526 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
527 dcbzl_szB
= 0; /* indicates unsupported */
530 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
531 /* some older assemblers won't understand the dcbzl instruction
532 * variant, so we directly emit the instruction ourselves */
533 __asm__
__volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
535 : "r" (aligned
) /*in*/
536 : "memory", "r9" /*clobber*/);
537 for (dcbzl_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
541 vg_assert(dcbzl_szB
== 16 || dcbzl_szB
== 32 || dcbzl_szB
== 64 || dcbzl_szB
== 128);
544 arch_info
->ppc_dcbz_szB
= dcbz_szB
;
545 arch_info
->ppc_dcbzl_szB
= dcbzl_szB
;
547 VG_(debugLog
)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
548 dcbz_szB
, dcbzl_szB
);
549 # undef MAX_DCBZL_SZB
551 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
555 /* Read /proc/cpuinfo. Look for lines like these
557 processor 0: version = FF, identification = 0117C9, machine = 2064
559 and return the machine model. If the machine model could not be determined
560 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
562 static UInt
VG_(get_machine_model
)(void)
564 static struct model_map
{
568 { "2064", VEX_S390X_MODEL_Z900
},
569 { "2066", VEX_S390X_MODEL_Z800
},
570 { "2084", VEX_S390X_MODEL_Z990
},
571 { "2086", VEX_S390X_MODEL_Z890
},
572 { "2094", VEX_S390X_MODEL_Z9_EC
},
573 { "2096", VEX_S390X_MODEL_Z9_BC
},
574 { "2097", VEX_S390X_MODEL_Z10_EC
},
575 { "2098", VEX_S390X_MODEL_Z10_BC
},
576 { "2817", VEX_S390X_MODEL_Z196
},
577 { "2818", VEX_S390X_MODEL_Z114
},
578 { "2827", VEX_S390X_MODEL_ZEC12
},
579 { "2828", VEX_S390X_MODEL_ZBC12
},
580 { "2964", VEX_S390X_MODEL_Z13
},
581 { "2965", VEX_S390X_MODEL_Z13S
},
582 { "3906", VEX_S390X_MODEL_Z14
},
583 { "3907", VEX_S390X_MODEL_Z14_ZR1
},
584 { "8561", VEX_S390X_MODEL_Z15
},
585 { "8562", VEX_S390X_MODEL_Z15
},
590 SizeT num_bytes
, file_buf_size
;
591 HChar
*p
, *m
, *model_name
, *file_buf
;
593 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
594 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
595 if ( sr_isError(fd
) ) return VEX_S390X_MODEL_UNKNOWN
;
599 /* Determine the size of /proc/cpuinfo.
600 Work around broken-ness in /proc file system implementation.
601 fstat returns a zero size for /proc/cpuinfo although it is
602 claimed to be a regular file. */
604 file_buf_size
= 1000;
605 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
607 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
611 if (n
< file_buf_size
) break; /* reached EOF */
614 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
616 if (num_bytes
> file_buf_size
) {
617 VG_(free
)( file_buf
);
618 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
619 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
620 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
621 if (n
< 0) num_bytes
= 0;
624 file_buf
[num_bytes
] = '\0';
628 model
= VEX_S390X_MODEL_UNKNOWN
;
629 for (p
= file_buf
; *p
; ++p
) {
630 /* Beginning of line */
631 if (VG_(strncmp
)( p
, "processor", sizeof "processor" - 1 ) != 0) continue;
633 m
= VG_(strstr
)( p
, "machine" );
634 if (m
== NULL
) continue;
636 p
= m
+ sizeof "machine" - 1;
637 while ( VG_(isspace
)( *p
) || *p
== '=') {
638 if (*p
== '\n') goto next_line
;
643 for (n
= 0; n
< sizeof model_map
/ sizeof model_map
[0]; ++n
) {
644 struct model_map
*mm
= model_map
+ n
;
645 SizeT len
= VG_(strlen
)( mm
->name
);
646 if ( VG_(strncmp
)( mm
->name
, model_name
, len
) == 0 &&
647 VG_(isspace
)( model_name
[len
] )) {
648 if (mm
->id
< model
) model
= mm
->id
;
649 p
= model_name
+ len
;
653 /* Skip until end-of-line */
659 VG_(free
)( file_buf
);
660 VG_(debugLog
)(1, "machine", "model = %s\n",
661 model
== VEX_S390X_MODEL_UNKNOWN
? "UNKNOWN"
662 : model_map
[model
].name
);
666 #endif /* defined(VGA_s390x) */
668 #if defined(VGA_mips32) || defined(VGA_mips64)
671 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
672 * determine what CPU it is (it searches only for the models that are or may be
673 * supported by Valgrind).
675 static Bool
VG_(parse_cpuinfo
)(void)
677 const char *search_Broadcom_str
= "cpu model\t\t: Broadcom";
678 const char *search_Cavium_str
= "cpu model\t\t: Cavium";
679 const char *search_Ingenic_str
= "cpu model\t\t: Ingenic";
680 const char *search_Loongson_str
= "cpu model\t\t: ICT Loongson";
681 const char *search_MIPS_str
= "cpu model\t\t: MIPS";
682 const char *search_Netlogic_str
= "cpu model\t\t: Netlogic";
686 SizeT num_bytes
, file_buf_size
;
687 HChar
*file_buf
, *isa
;
689 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
690 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
691 if ( sr_isError(fd
) ) return False
;
695 /* Determine the size of /proc/cpuinfo.
696 Work around broken-ness in /proc file system implementation.
697 fstat returns a zero size for /proc/cpuinfo although it is
698 claimed to be a regular file. */
700 file_buf_size
= 1000;
701 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
703 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
707 if (n
< file_buf_size
) break; /* reached EOF */
710 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
712 if (num_bytes
> file_buf_size
) {
713 VG_(free
)( file_buf
);
714 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
715 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
716 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
717 if (n
< 0) num_bytes
= 0;
720 file_buf
[num_bytes
] = '\0';
724 if (VG_(strstr
)(file_buf
, search_Broadcom_str
) != NULL
)
725 vai
.hwcaps
= VEX_PRID_COMP_BROADCOM
;
726 else if (VG_(strstr
)(file_buf
, search_Netlogic_str
) != NULL
)
727 vai
.hwcaps
= VEX_PRID_COMP_NETLOGIC
;
728 else if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
729 vai
.hwcaps
= VEX_PRID_COMP_CAVIUM
;
730 else if (VG_(strstr
)(file_buf
, search_MIPS_str
) != NULL
)
731 vai
.hwcaps
= VEX_PRID_COMP_MIPS
;
732 else if (VG_(strstr
)(file_buf
, search_Ingenic_str
) != NULL
)
733 vai
.hwcaps
= VEX_PRID_COMP_INGENIC_E1
;
734 else if (VG_(strstr
)(file_buf
, search_Loongson_str
) != NULL
)
735 vai
.hwcaps
= (VEX_PRID_COMP_LEGACY
| VEX_PRID_IMP_LOONGSON_64
);
737 /* Did not find string in the proc file. */
743 isa
= VG_(strstr
)(file_buf
, "isa\t\t\t: ");
746 if (VG_(strstr
) (isa
, "mips32r1") != NULL
)
747 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
748 if (VG_(strstr
) (isa
, "mips32r2") != NULL
)
749 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
750 if (VG_(strstr
) (isa
, "mips32r6") != NULL
)
751 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R6
;
752 if (VG_(strstr
) (isa
, "mips64r1") != NULL
)
753 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R1
;
754 if (VG_(strstr
) (isa
, "mips64r2") != NULL
)
755 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
;
756 if (VG_(strstr
) (isa
, "mips64r6") != NULL
)
757 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R6
;
760 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
761 * decide to change incorrect settings in
762 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
763 * The current settings show mips32r1, mips32r2 and mips64r1 as
764 * unsupported ISAs by Cavium MIPS CPUs.
766 if (VEX_MIPS_COMP_ID(vai
.hwcaps
) == VEX_PRID_COMP_CAVIUM
) {
767 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
| VEX_MIPS_CPU_ISA_M32R2
|
768 VEX_MIPS_CPU_ISA_M64R1
;
772 * Kernel does not provide information about supported ISAs.
773 * Populate the isa level flags based on the CPU model. That is our
776 switch VEX_MIPS_COMP_ID(vai
.hwcaps
) {
777 case VEX_PRID_COMP_CAVIUM
:
778 case VEX_PRID_COMP_NETLOGIC
:
779 vai
.hwcaps
|= (VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
);
781 case VEX_PRID_COMP_INGENIC_E1
:
782 case VEX_PRID_COMP_MIPS
:
783 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
785 case VEX_PRID_COMP_BROADCOM
:
786 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
788 case VEX_PRID_COMP_LEGACY
:
789 if ((VEX_MIPS_PROC_ID(vai
.hwcaps
) == VEX_PRID_IMP_LOONGSON_64
))
790 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
|
791 VEX_MIPS_CPU_ISA_M32R2
| VEX_MIPS_CPU_ISA_M32R1
;
801 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
803 #if defined(VGP_arm64_linux)
805 /* Check to see whether we are running on a Cavium core, and if so auto-enable
806 the fallback LLSC implementation. See #369459. */
808 static Bool
VG_(parse_cpuinfo
)(void)
810 const char *search_Cavium_str
= "CPU implementer\t: 0x43";
814 SizeT num_bytes
, file_buf_size
;
817 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
818 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
819 if ( sr_isError(fd
) ) return False
;
823 /* Determine the size of /proc/cpuinfo.
824 Work around broken-ness in /proc file system implementation.
825 fstat returns a zero size for /proc/cpuinfo although it is
826 claimed to be a regular file. */
828 file_buf_size
= 1000;
829 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
831 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
835 if (n
< file_buf_size
) break; /* reached EOF */
838 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
840 if (num_bytes
> file_buf_size
) {
841 VG_(free
)( file_buf
);
842 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
843 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
844 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
845 if (n
< 0) num_bytes
= 0;
848 file_buf
[num_bytes
] = '\0';
852 if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
853 vai
.arm64_requires_fallback_LLSC
= True
;
859 #endif /* defined(VGP_arm64_linux) */
861 Bool
VG_(machine_get_hwcaps
)( void )
863 vg_assert(hwcaps_done
== False
);
866 // Whack default settings into vai, so that we only need to fill in
867 // any interesting bits.
868 LibVEX_default_VexArchInfo(&vai
);
871 { Bool have_sse1
, have_sse2
, have_sse3
, have_cx8
, have_lzcnt
, have_mmxext
;
872 UInt eax
, ebx
, ecx
, edx
, max_extended
;
876 if (!VG_(has_cpuid
)())
877 /* we can't do cpuid at all. Give up. */
880 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
882 /* we can't ask for cpuid(x) for x > 0. Give up. */
885 /* Get processor ID string, and max basic/extended index
887 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
888 VG_(memcpy
)(&vstr
[4], &edx
, 4);
889 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
892 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
895 /* get capabilities bits into edx */
896 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
898 have_sse1
= (edx
& (1<<25)) != 0; /* True => have sse insns */
899 have_sse2
= (edx
& (1<<26)) != 0; /* True => have sse2 insns */
900 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
902 /* cmpxchg8b is a minimum requirement now; if we don't have it we
903 must simply give up. But all CPUs since Pentium-I have it, so
904 that doesn't seem like much of a restriction. */
905 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
909 /* Figure out if this is an AMD that can do MMXEXT. */
911 if (0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
912 && max_extended
>= 0x80000001) {
913 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
914 /* Some older AMD processors support a sse1 subset (Integer SSE). */
915 have_mmxext
= !have_sse1
&& ((edx
& (1<<22)) != 0);
918 /* Figure out if this is an AMD or Intel that can do LZCNT. */
920 if ((0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
921 || 0 == VG_(strcmp
)(vstr
, "GenuineIntel"))
922 && max_extended
>= 0x80000001) {
923 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
924 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
927 /* Intel processors don't define the mmxext extension, but since it
928 is just a sse1 subset always define it when we have sse1. */
933 vai
.endness
= VexEndnessLE
;
935 if (have_sse3
&& have_sse2
&& have_sse1
&& have_mmxext
) {
936 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
937 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
938 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
939 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE3
;
941 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
942 VG_(machine_x86_have_mxcsr
) = 1;
943 } else if (have_sse2
&& have_sse1
&& have_mmxext
) {
944 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
945 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
946 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
948 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
949 VG_(machine_x86_have_mxcsr
) = 1;
950 } else if (have_sse1
&& have_mmxext
) {
951 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
952 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
953 VG_(machine_x86_have_mxcsr
) = 1;
954 } else if (have_mmxext
) {
955 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
; /*integer only sse1 subset*/
956 VG_(machine_x86_have_mxcsr
) = 0;
958 vai
.hwcaps
= 0; /*baseline - no sse at all*/
959 VG_(machine_x86_have_mxcsr
) = 0;
962 VG_(machine_get_cache_info
)(&vai
);
967 #elif defined(VGA_amd64)
968 { Bool have_sse3
, have_ssse3
, have_cx8
, have_cx16
;
969 Bool have_lzcnt
, have_avx
, have_bmi
, have_avx2
;
970 Bool have_rdtscp
, have_rdrand
, have_f16c
;
971 UInt eax
, ebx
, ecx
, edx
, max_basic
, max_extended
;
976 have_sse3
= have_ssse3
= have_cx8
= have_cx16
977 = have_lzcnt
= have_avx
= have_bmi
= have_avx2
978 = have_rdtscp
= have_rdrand
= have_f16c
= False
;
980 eax
= ebx
= ecx
= edx
= max_basic
= max_extended
= 0;
982 if (!VG_(has_cpuid
)())
983 /* we can't do cpuid at all. Give up. */
986 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
989 /* we can't ask for cpuid(x) for x > 0. Give up. */
992 /* Get processor ID string, and max basic/extended index
994 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
995 VG_(memcpy
)(&vstr
[4], &edx
, 4);
996 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
999 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
1002 /* get capabilities bits into edx */
1003 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
1005 // we assume that SSE1 and SSE2 are available by default
1006 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
1007 have_ssse3
= (ecx
& (1<<9)) != 0; /* True => have Sup SSE3 insns */
1012 // osxsave is ecx:27
1014 have_f16c
= (ecx
& (1<<29)) != 0; /* True => have F16C insns */
1015 have_rdrand
= (ecx
& (1<<30)) != 0; /* True => have RDRAND insns */
1018 /* have_fma = False; */
1019 if ( (ecx
& ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1020 /* Processor supports AVX instructions and XGETBV is enabled
1021 by OS and AVX instructions are enabled by the OS. */
1023 __asm__
__volatile__("movq $0,%%rcx ; "
1024 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1026 :/*OUT*/"=r"(w
) :/*IN*/
1027 :/*TRASH*/"rdx","rcx","rax");
1029 if ((xgetbv_0
& 7) == 7) {
1030 /* Only say we have AVX if the XSAVE-allowable
1031 bitfield-mask allows x87, SSE and AVX state. We could
1032 actually run with a more restrictive XGETBV(0) value,
1033 but VEX's implementation of XSAVE and XRSTOR assumes
1034 that all 3 bits are enabled.
1036 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1037 state component [2] (the YMM high halves) are located in
1038 the XSAVE image at offsets 576 .. 831. So we have to
1039 check that here before declaring AVX to be supported. */
1040 UInt eax2
, ebx2
, ecx2
, edx2
;
1041 VG_(cpuid
)(0xD, 2, &eax2
, &ebx2
, &ecx2
, &edx2
);
1042 if (ebx2
== 576 && eax2
== 256) {
1045 /* have_fma = (ecx & (1<<12)) != 0; */
1046 /* have_fma: Probably correct, but gcc complains due to
1051 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1052 must simply give up. But all CPUs since Pentium-I have it, so
1053 that doesn't seem like much of a restriction. */
1054 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
1058 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1059 have_cx16
= (ecx
& (1<<13)) != 0; /* True => have cmpxchg16b */
1061 /* Figure out if this CPU can do LZCNT. */
1063 if (max_extended
>= 0x80000001) {
1064 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1065 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
1068 /* Can we do RDTSCP? */
1069 have_rdtscp
= False
;
1070 if (max_extended
>= 0x80000001) {
1071 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1072 have_rdtscp
= (edx
& (1<<27)) != 0; /* True => have RDTSVCP */
1075 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1078 if (have_avx
&& max_basic
>= 7) {
1079 VG_(cpuid
)(7, 0, &eax
, &ebx
, &ecx
, &edx
);
1080 have_bmi
= (ebx
& (1<<3)) != 0; /* True => have BMI1 */
1081 have_avx2
= (ebx
& (1<<5)) != 0; /* True => have AVX2 */
1084 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1085 it's convenient to restrict them to the AVX case since the simulated
1086 CPUID we'll offer them on has AVX as a base. */
1089 have_rdrand
= False
;
1093 vai
.endness
= VexEndnessLE
;
1094 vai
.hwcaps
= (have_sse3
? VEX_HWCAPS_AMD64_SSE3
: 0)
1095 | (have_ssse3
? VEX_HWCAPS_AMD64_SSSE3
: 0)
1096 | (have_cx16
? VEX_HWCAPS_AMD64_CX16
: 0)
1097 | (have_lzcnt
? VEX_HWCAPS_AMD64_LZCNT
: 0)
1098 | (have_avx
? VEX_HWCAPS_AMD64_AVX
: 0)
1099 | (have_bmi
? VEX_HWCAPS_AMD64_BMI
: 0)
1100 | (have_avx2
? VEX_HWCAPS_AMD64_AVX2
: 0)
1101 | (have_rdtscp
? VEX_HWCAPS_AMD64_RDTSCP
: 0)
1102 | (have_f16c
? VEX_HWCAPS_AMD64_F16C
: 0)
1103 | (have_rdrand
? VEX_HWCAPS_AMD64_RDRAND
: 0);
1105 VG_(machine_get_cache_info
)(&vai
);
1110 #elif defined(VGA_ppc32)
1112 /* Find out which subset of the ppc32 instruction set is supported by
1113 verifying whether various ppc32 instructions generate a SIGILL
1114 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1115 AT_PLATFORM entries in the ELF auxiliary table -- see also
1116 the_iifii.client_auxv in m_main.c.
1118 vki_sigset_t saved_set
, tmp_set
;
1119 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1120 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1122 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1123 volatile Bool have_isa_2_07
, have_isa_3_0
;
1126 /* This is a kludge. Really we ought to back-convert saved_act
1127 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1128 since that's a no-op on all ppc32 platforms so far supported,
1129 it's not worth the typing effort. At least include most basic
1131 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1133 VG_(sigemptyset
)(&tmp_set
);
1134 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1135 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1137 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1140 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1142 tmp_sigill_act
= saved_sigill_act
;
1144 r
= VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1146 tmp_sigfpe_act
= saved_sigfpe_act
;
1148 /* NODEFER: signal handler does not return (from the kernel's point of
1149 view), hence if it is to successfully catch a signal more than once,
1150 we need the NODEFER flag. */
1151 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1152 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1153 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1154 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1155 r
= VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1158 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1159 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1160 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1161 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1162 r
= VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1165 /* standard FP insns */
1167 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1170 __asm__
__volatile__(".long 0xFC000090"); /*fmr 0,0 */
1175 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1178 /* Unfortunately some older assemblers don't speak Altivec (or
1179 choose not to), so to be safe we directly emit the 32-bit
1180 word corresponding to "vor 0,0,0". This fixes a build
1181 problem that happens on Debian 3.1 (ppc32), and probably
1182 various other places. */
1183 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1186 /* General-Purpose optional (fsqrt, fsqrts) */
1188 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1191 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1194 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1196 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1199 __asm__
__volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1202 /* VSX support implies Power ISA 2.06 */
1204 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1207 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1210 /* Check for Decimal Floating Point (DFP) support. */
1212 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1215 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1218 /* Check for ISA 2.07 support. */
1219 have_isa_2_07
= True
;
1220 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1221 have_isa_2_07
= False
;
1223 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1226 /* Check for ISA 3.0 support. */
1227 have_isa_3_0
= True
;
1228 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1229 have_isa_3_0
= False
;
1231 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1234 /* determine dcbz/dcbzl sizes while we still have the signal
1235 * handlers registered */
1236 find_ppc_dcbz_sz(&vai
);
1238 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1240 r
= VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1242 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1244 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1245 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1246 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1247 (Int
)have_isa_2_07
, (Int
)have_isa_3_0
);
1248 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1249 if (have_V
&& !have_F
)
1251 if (have_FX
&& !have_F
)
1253 if (have_GX
&& !have_F
)
1256 VG_(machine_ppc32_has_FP
) = have_F
? 1 : 0;
1257 VG_(machine_ppc32_has_VMX
) = have_V
? 1 : 0;
1260 vai
.endness
= VexEndnessBE
;
1263 if (have_F
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_F
;
1264 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_V
;
1265 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_FX
;
1266 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_GX
;
1267 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_VX
;
1268 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_DFP
;
1269 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA2_07
;
1270 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA3_0
;
1272 VG_(machine_get_cache_info
)(&vai
);
1274 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1275 called before we're ready to go. */
1279 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1281 /* Same instruction set detection algorithm as for ppc32. */
1282 vki_sigset_t saved_set
, tmp_set
;
1283 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1284 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1286 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1287 volatile Bool have_isa_2_07
, have_isa_3_0
;
1290 /* This is a kludge. Really we ought to back-convert saved_act
1291 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1292 since that's a no-op on all ppc64 platforms so far supported,
1293 it's not worth the typing effort. At least include most basic
1295 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1297 VG_(sigemptyset
)(&tmp_set
);
1298 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1299 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1301 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1304 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1306 tmp_sigill_act
= saved_sigill_act
;
1308 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1309 tmp_sigfpe_act
= saved_sigfpe_act
;
1311 /* NODEFER: signal handler does not return (from the kernel's point of
1312 view), hence if it is to successfully catch a signal more than once,
1313 we need the NODEFER flag. */
1314 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1315 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1316 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1317 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1318 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1320 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1321 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1322 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1323 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1324 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1326 /* standard FP insns */
1328 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1331 __asm__
__volatile__("fmr 0,0");
1336 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1339 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1342 /* General-Purpose optional (fsqrt, fsqrts) */
1344 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1347 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1350 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1352 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1355 __asm__
__volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1358 /* VSX support implies Power ISA 2.06 */
1360 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1363 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1366 /* Check for Decimal Floating Point (DFP) support. */
1368 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1371 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1374 /* Check for ISA 2.07 support. */
1375 have_isa_2_07
= True
;
1376 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1377 have_isa_2_07
= False
;
1379 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1382 /* Check for ISA 3.0 support. */
1383 have_isa_3_0
= True
;
1384 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1385 have_isa_3_0
= False
;
1387 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1390 /* determine dcbz/dcbzl sizes while we still have the signal
1391 * handlers registered */
1392 find_ppc_dcbz_sz(&vai
);
1394 VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1395 VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1396 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1397 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1398 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1399 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1400 (Int
)have_isa_2_07
, (int)have_isa_3_0
);
1401 /* on ppc64be, if we don't even have FP, just give up. */
1405 VG_(machine_ppc64_has_VMX
) = have_V
? 1 : 0;
1408 # if defined(VKI_LITTLE_ENDIAN)
1409 vai
.endness
= VexEndnessLE
;
1410 # elif defined(VKI_BIG_ENDIAN)
1411 vai
.endness
= VexEndnessBE
;
1413 vai
.endness
= VexEndness_INVALID
;
1417 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_V
;
1418 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_FX
;
1419 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_GX
;
1420 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_VX
;
1421 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_DFP
;
1422 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA2_07
;
1423 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_0
;
1425 VG_(machine_get_cache_info
)(&vai
);
1427 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1428 called before we're ready to go. */
1432 #elif defined(VGA_s390x)
1434 # include "libvex_s390x_common.h"
1437 /* Instruction set detection code borrowed from ppc above. */
1438 vki_sigset_t saved_set
, tmp_set
;
1439 vki_sigaction_fromK_t saved_sigill_act
;
1440 vki_sigaction_toK_t tmp_sigill_act
;
1442 volatile Bool have_LDISP
, have_STFLE
;
1445 /* If the model is "unknown" don't treat this as an error. Assume
1446 this is a brand-new machine model for which we don't have the
1447 identification yet. Keeping fingers crossed. */
1448 model
= VG_(get_machine_model
)();
1450 /* Unblock SIGILL and stash away the old action for that signal */
1451 VG_(sigemptyset
)(&tmp_set
);
1452 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1454 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1457 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1459 tmp_sigill_act
= saved_sigill_act
;
1461 /* NODEFER: signal handler does not return (from the kernel's point of
1462 view), hence if it is to successfully catch a signal more than once,
1463 we need the NODEFER flag. */
1464 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1465 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1466 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1467 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1468 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1470 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1471 is not supported on z900. */
1474 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1477 /* BASR loads the address of the next insn into r1. Needed to avoid
1478 a segfault in XY. */
1479 __asm__
__volatile__("basr %%r1,%%r0\n\t"
1480 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1481 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1484 /* Check availability of STFLE. If available store facility bits
1486 ULong hoststfle
[S390_NUM_FACILITY_DW
];
1488 for (i
= 0; i
< S390_NUM_FACILITY_DW
; ++i
)
1492 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1495 register ULong reg0
asm("0") = S390_NUM_FACILITY_DW
- 1;
1497 __asm__
__volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1498 : "=m" (hoststfle
), "+d"(reg0
)
1499 : : "cc", "memory");
1502 /* Restore signals */
1503 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1505 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1508 vai
.endness
= VexEndnessBE
;
1511 if (have_STFLE
) vai
.hwcaps
|= VEX_HWCAPS_S390X_STFLE
;
1513 /* Use long displacement only on machines >= z990. For all other
1514 machines it is millicoded and therefore slow. */
1515 if (model
>= VEX_S390X_MODEL_Z990
)
1516 vai
.hwcaps
|= VEX_HWCAPS_S390X_LDISP
;
1519 /* Detect presence of certain facilities using the STFLE insn.
1520 Note, that these facilities were introduced at the same time or later
1521 as STFLE, so the absence of STLFE implies the absence of the facility
1522 we're trying to detect. */
1523 struct fac_hwcaps_map
{
1527 const HChar name
[6]; // may need adjustment for new facility names
1529 { False
, S390_FAC_EIMM
, VEX_HWCAPS_S390X_EIMM
, "EIMM" },
1530 { False
, S390_FAC_GIE
, VEX_HWCAPS_S390X_GIE
, "GIE" },
1531 { False
, S390_FAC_DFP
, VEX_HWCAPS_S390X_DFP
, "DFP" },
1532 { False
, S390_FAC_FPSE
, VEX_HWCAPS_S390X_FGX
, "FGX" },
1533 { False
, S390_FAC_ETF2
, VEX_HWCAPS_S390X_ETF2
, "ETF2" },
1534 { False
, S390_FAC_ETF3
, VEX_HWCAPS_S390X_ETF3
, "ETF3" },
1535 { False
, S390_FAC_STCKF
, VEX_HWCAPS_S390X_STCKF
, "STCKF" },
1536 { False
, S390_FAC_FPEXT
, VEX_HWCAPS_S390X_FPEXT
, "FPEXT" },
1537 { False
, S390_FAC_LSC
, VEX_HWCAPS_S390X_LSC
, "LSC" },
1538 { False
, S390_FAC_PFPO
, VEX_HWCAPS_S390X_PFPO
, "PFPO" },
1539 { False
, S390_FAC_VX
, VEX_HWCAPS_S390X_VX
, "VX" },
1540 { False
, S390_FAC_MSA5
, VEX_HWCAPS_S390X_MSA5
, "MSA5" },
1541 { False
, S390_FAC_MI2
, VEX_HWCAPS_S390X_MI2
, "MI2" },
1542 { False
, S390_FAC_LSC2
, VEX_HWCAPS_S390X_LSC2
, "LSC2" },
1545 /* Set hwcaps according to the detected facilities */
1546 UChar dw_number
= 0;
1548 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1549 vg_assert(fac_hwcaps
[i
].facility_bit
<= 191); // for now
1550 dw_number
= fac_hwcaps
[i
].facility_bit
/ 64;
1551 fac_bit
= fac_hwcaps
[i
].facility_bit
% 64;
1552 if (hoststfle
[dw_number
] & (1ULL << (63 - fac_bit
))) {
1553 fac_hwcaps
[i
].installed
= True
;
1554 vai
.hwcaps
|= fac_hwcaps
[i
].hwcaps_bit
;
1558 /* Build up a string showing the probed-for facilities */
1559 HChar fac_str
[(sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]) *
1560 (sizeof fac_hwcaps
[0].name
+ 3) + // %s %d
1561 7 + 1 + 4 + 2 // machine %4d
1564 p
+= VG_(sprintf
)(p
, "machine %4d ", model
);
1565 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1566 p
+= VG_(sprintf
)(p
, " %s %1u", fac_hwcaps
[i
].name
,
1567 fac_hwcaps
[i
].installed
);
1571 VG_(debugLog
)(1, "machine", "%s\n", fac_str
);
1572 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1574 VG_(machine_get_cache_info
)(&vai
);
1579 #elif defined(VGA_arm)
1581 /* Same instruction set detection algorithm as for ppc32. */
1582 vki_sigset_t saved_set
, tmp_set
;
1583 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1584 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1586 volatile Bool have_VFP
, have_VFP2
, have_VFP3
, have_NEON
, have_V8
;
1587 volatile Int archlevel
;
1590 /* This is a kludge. Really we ought to back-convert saved_act
1591 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1592 since that's a no-op on all ppc64 platforms so far supported,
1593 it's not worth the typing effort. At least include most basic
1595 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1597 VG_(sigemptyset
)(&tmp_set
);
1598 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1599 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1601 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1604 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1606 tmp_sigill_act
= saved_sigill_act
;
1608 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1609 tmp_sigfpe_act
= saved_sigfpe_act
;
1611 /* NODEFER: signal handler does not return (from the kernel's point of
1612 view), hence if it is to successfully catch a signal more than once,
1613 we need the NODEFER flag. */
1614 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1615 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1616 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1617 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1618 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1620 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1621 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1622 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1623 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1624 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1628 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1631 __asm__
__volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1633 /* There are several generation of VFP extension but they differs very
1634 little so for now we will not distinguish them. */
1635 have_VFP2
= have_VFP
;
1636 have_VFP3
= have_VFP
;
1640 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1643 __asm__
__volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1646 /* ARM architecture level */
1647 archlevel
= 5; /* v5 will be base level */
1648 if (archlevel
< 7) {
1650 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1653 __asm__
__volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1656 if (archlevel
< 6) {
1658 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1661 __asm__
__volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1667 if (archlevel
== 7) {
1668 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1671 __asm__
__volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1673 if (have_V8
&& have_NEON
&& have_VFP3
) {
1678 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1679 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigfpe_act
, &tmp_sigfpe_act
);
1680 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1681 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1682 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1684 VG_(debugLog
)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1685 archlevel
, (Int
)have_VFP
, (Int
)have_VFP2
, (Int
)have_VFP3
,
1688 VG_(machine_arm_archlevel
) = archlevel
;
1691 vai
.endness
= VexEndnessLE
;
1693 vai
.hwcaps
= VEX_ARM_ARCHLEVEL(archlevel
);
1694 if (have_VFP3
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP3
;
1695 if (have_VFP2
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP2
;
1696 if (have_VFP
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP
;
1697 if (have_NEON
) vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1699 VG_(machine_get_cache_info
)(&vai
);
1704 #elif defined(VGA_arm64)
1707 vai
.endness
= VexEndnessLE
;
1709 /* So far there are no variants. */
1712 VG_(machine_get_cache_info
)(&vai
);
1714 /* Check whether we need to use the fallback LLSC implementation.
1715 If the check fails, give up. */
1716 if (! VG_(parse_cpuinfo
)())
1719 /* 0 denotes 'not set'. The range of legitimate values here,
1720 after being set that is, is 2 though 17 inclusive. */
1721 vg_assert(vai
.arm64_dMinLine_lg2_szB
== 0);
1722 vg_assert(vai
.arm64_iMinLine_lg2_szB
== 0);
1724 __asm__
__volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0
));
1725 vai
.arm64_dMinLine_lg2_szB
= ((ctr_el0
>> 16) & 0xF) + 2;
1726 vai
.arm64_iMinLine_lg2_szB
= ((ctr_el0
>> 0) & 0xF) + 2;
1727 VG_(debugLog
)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1728 "ctr_el0.iMinLine_szB = %d\n",
1729 1 << vai
.arm64_dMinLine_lg2_szB
,
1730 1 << vai
.arm64_iMinLine_lg2_szB
);
1731 VG_(debugLog
)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1732 vai
.arm64_requires_fallback_LLSC
? "yes" : "no");
1737 #elif defined(VGA_mips32)
1739 /* Define the position of F64 bit in FIR register. */
1742 if (!VG_(parse_cpuinfo
)())
1745 # if defined(VKI_LITTLE_ENDIAN)
1746 vai
.endness
= VexEndnessLE
;
1747 # elif defined(VKI_BIG_ENDIAN)
1748 vai
.endness
= VexEndnessBE
;
1750 vai
.endness
= VexEndness_INVALID
;
1753 /* Same instruction set detection algorithm as for ppc32/arm... */
1754 vki_sigset_t saved_set
, tmp_set
;
1755 vki_sigaction_fromK_t saved_sigill_act
;
1756 vki_sigaction_toK_t tmp_sigill_act
;
1758 volatile Bool have_DSP
, have_DSPr2
, have_MSA
;
1761 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1763 VG_(sigemptyset
)(&tmp_set
);
1764 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1766 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1769 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1771 tmp_sigill_act
= saved_sigill_act
;
1773 /* NODEFER: signal handler does not return (from the kernel's point of
1774 view), hence if it is to successfully catch a signal more than once,
1775 we need the NODEFER flag. */
1776 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1777 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1778 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1779 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1780 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1782 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
1784 /* MSA instructions. */
1786 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1789 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1792 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
1794 /* DSPr2 instructions. */
1796 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1799 __asm__
__volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1802 /* We assume it's 74K, since it can run DSPr2. */
1803 vai
.hwcaps
|= VEX_PRID_IMP_74K
;
1805 /* DSP instructions. */
1807 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1810 __asm__
__volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1813 /* We assume it's 34K, since it has support for DSP. */
1814 vai
.hwcaps
|= VEX_PRID_IMP_34K
;
1820 # if defined(VGP_mips32_linux)
1821 Int fpmode
= VG_(prctl
)(VKI_PR_GET_FP_MODE
, 0, 0, 0, 0);
1827 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1828 we are using alternative way to determine FP mode */
1831 if (!VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1834 ".set noreorder\n\t"
1836 ".set hardfloat\n\t"
1837 "lui $t0, 0x3FF0\n\t"
1844 : "t0", "$f0", "$f1", "memory");
1846 fpmode
= (result
!= 0x3FF0000000000000ull
);
1851 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
1853 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1854 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1855 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1857 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1858 VG_(machine_get_cache_info
)(&vai
);
1863 #elif defined(VGA_mips64)
1866 if (!VG_(parse_cpuinfo
)())
1869 # if defined(VKI_LITTLE_ENDIAN)
1870 vai
.endness
= VexEndnessLE
;
1871 # elif defined(VKI_BIG_ENDIAN)
1872 vai
.endness
= VexEndnessBE
;
1874 vai
.endness
= VexEndness_INVALID
;
1877 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
1879 /* Same instruction set detection algorithm as for ppc32/arm... */
1880 vki_sigset_t saved_set
, tmp_set
;
1881 vki_sigaction_fromK_t saved_sigill_act
;
1882 vki_sigaction_toK_t tmp_sigill_act
;
1884 volatile Bool have_MSA
;
1887 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1889 VG_(sigemptyset
)(&tmp_set
);
1890 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1892 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1895 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1897 tmp_sigill_act
= saved_sigill_act
;
1899 /* NODEFER: signal handler does not return (from the kernel's point of
1900 view), hence if it is to successfully catch a signal more than once,
1901 we need the NODEFER flag. */
1902 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1903 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1904 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1905 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1906 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1908 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
1910 /* MSA instructions */
1912 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1915 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1918 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
1922 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1923 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1924 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1926 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1928 VG_(machine_get_cache_info
)(&vai
);
1933 #elif defined(VGP_nanomips_linux)
1935 va
= VexArchNANOMIPS
;
1938 # if defined(VKI_LITTLE_ENDIAN)
1939 vai
.endness
= VexEndnessLE
;
1940 # elif defined(VKI_BIG_ENDIAN)
1941 vai
.endness
= VexEndnessBE
;
1943 vai
.endness
= VexEndness_INVALID
;
1946 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1948 VG_(machine_get_cache_info
)(&vai
);
1953 # error "Unknown arch"
1957 /* Notify host cpu instruction cache line size. */
1958 #if defined(VGA_ppc32)
1959 void VG_(machine_ppc32_set_clszB
)( Int szB
)
1961 vg_assert(hwcaps_done
);
1963 /* Either the value must not have been set yet (zero) or we can
1964 tolerate it being set to the same value multiple times, as the
1965 stack scanning logic in m_main is a bit stupid. */
1966 vg_assert(vai
.ppc_icache_line_szB
== 0
1967 || vai
.ppc_icache_line_szB
== szB
);
1969 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
1970 vai
.ppc_icache_line_szB
= szB
;
1975 /* Notify host cpu instruction cache line size. */
1976 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1977 void VG_(machine_ppc64_set_clszB
)( Int szB
)
1979 vg_assert(hwcaps_done
);
1981 /* Either the value must not have been set yet (zero) or we can
1982 tolerate it being set to the same value multiple times, as the
1983 stack scanning logic in m_main is a bit stupid. */
1984 vg_assert(vai
.ppc_icache_line_szB
== 0
1985 || vai
.ppc_icache_line_szB
== szB
);
1987 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
1988 vai
.ppc_icache_line_szB
= szB
;
1993 /* Notify host's ability to handle NEON instructions. */
1994 #if defined(VGA_arm)
1995 void VG_(machine_arm_set_has_NEON
)( Bool has_neon
)
1997 vg_assert(hwcaps_done
);
1998 /* There's nothing else we can sanity check. */
2001 vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
2003 vai
.hwcaps
&= ~VEX_HWCAPS_ARM_NEON
;
2009 /* Fetch host cpu info, once established. */
2010 void VG_(machine_get_VexArchInfo
)( /*OUT*/VexArch
* pVa
,
2011 /*OUT*/VexArchInfo
* pVai
)
2013 vg_assert(hwcaps_done
);
2015 if (pVai
) *pVai
= vai
;
2019 /* Returns the size of the largest guest register that we will
2020 simulate in this run. This depends on both the guest architecture
2021 and on the specific capabilities we are simulating for that guest
2022 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2023 or 32. General rule: if in doubt, return a value larger than
2026 This information is needed by Cachegrind and Callgrind to decide
2027 what the minimum cache line size they are prepared to simulate is.
2028 Basically require that the minimum cache line size is at least as
2029 large as the largest register that might get transferred to/from
2030 memory, so as to guarantee that any such transaction can straddle
2031 at most 2 cache lines.
2033 Int
VG_(machine_get_size_of_largest_guest_register
) ( void )
2035 vg_assert(hwcaps_done
);
2036 /* Once hwcaps_done is True, we can fish around inside va/vai to
2037 find the information we need. */
2039 # if defined(VGA_x86)
2040 vg_assert(va
== VexArchX86
);
2041 /* We don't support AVX, so 32 is out. At the other end, even if
2042 we don't support any SSE, the X87 can generate 10 byte
2043 transfers, so let's say 16 to be on the safe side. Hence the
2044 answer is always 16. */
2047 # elif defined(VGA_amd64)
2048 /* if AVX then 32 else 16 */
2049 return (vai
.hwcaps
& VEX_HWCAPS_AMD64_AVX
) ? 32 : 16;
2051 # elif defined(VGA_ppc32)
2052 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2053 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_V
) return 16;
2054 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_VX
) return 16;
2055 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_DFP
) return 16;
2058 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2059 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2060 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_V
) return 16;
2061 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_VX
) return 16;
2062 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_DFP
) return 16;
2065 # elif defined(VGA_s390x)
2068 # elif defined(VGA_arm)
2069 /* Really it depends whether or not we have NEON, but let's just
2070 assume we always do. */
2073 # elif defined(VGA_arm64)
2074 /* ARM64 always has Neon, AFAICS. */
2077 # elif defined(VGA_mips32) || defined(VGP_nanomips_linux)
2078 /* The guest state implies 4, but that can't really be true, can
2082 # elif defined(VGA_mips64)
2086 # error "Unknown arch"
2091 // Given a pointer to a function as obtained by "& functionname" in C,
2092 // produce a pointer to the actual entry point for the function.
2093 void* VG_(fnptr_to_fnentry
)( void* f
)
2095 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2096 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2097 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2098 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2099 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2100 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris) \
2101 || defined(VGP_nanomips_linux)
2103 # elif defined(VGP_ppc64be_linux)
2104 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2105 3-word function descriptor, of which the first word is the entry
2107 UWord
* descr
= (UWord
*)f
;
2108 return (void*)(descr
[0]);
2110 # error "Unknown platform"
2114 /*--------------------------------------------------------------------*/
2116 /*--------------------------------------------------------------------*/