1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation
9 Copyright (C) 2000-2017 Julian Seward
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr
VG_(get_IP
) ( ThreadId tid
) {
50 return INSTR_PTR( VG_(threads
)[tid
].arch
);
52 Addr
VG_(get_SP
) ( ThreadId tid
) {
53 return STACK_PTR( VG_(threads
)[tid
].arch
);
55 Addr
VG_(get_FP
) ( ThreadId tid
) {
56 return FRAME_PTR( VG_(threads
)[tid
].arch
);
59 Addr
VG_(get_SP_s1
) ( ThreadId tid
) {
60 return STACK_PTR_S1( VG_(threads
)[tid
].arch
);
62 void VG_(set_SP_s1
) ( ThreadId tid
, Addr sp
) {
63 STACK_PTR_S1( VG_(threads
)[tid
].arch
) = sp
;
66 void VG_(set_IP
) ( ThreadId tid
, Addr ip
) {
67 INSTR_PTR( VG_(threads
)[tid
].arch
) = ip
;
69 void VG_(set_SP
) ( ThreadId tid
, Addr sp
) {
70 STACK_PTR( VG_(threads
)[tid
].arch
) = sp
;
73 void VG_(get_UnwindStartRegs
) ( /*OUT*/UnwindStartRegs
* regs
,
77 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_EIP
;
78 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_ESP
;
80 = VG_(threads
)[tid
].arch
.vex
.guest_EBP
;
81 # elif defined(VGA_amd64)
82 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_RIP
;
83 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_RSP
;
84 regs
->misc
.AMD64
.r_rbp
85 = VG_(threads
)[tid
].arch
.vex
.guest_RBP
;
86 # elif defined(VGA_ppc32)
87 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
88 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
90 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
93 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
95 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
96 # elif defined(VGA_arm)
97 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R15T
;
98 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R13
;
100 = VG_(threads
)[tid
].arch
.vex
.guest_R14
;
102 = VG_(threads
)[tid
].arch
.vex
.guest_R12
;
104 = VG_(threads
)[tid
].arch
.vex
.guest_R11
;
106 = VG_(threads
)[tid
].arch
.vex
.guest_R7
;
107 # elif defined(VGA_arm64)
108 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
109 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_XSP
;
110 regs
->misc
.ARM64
.x29
= VG_(threads
)[tid
].arch
.vex
.guest_X29
;
111 regs
->misc
.ARM64
.x30
= VG_(threads
)[tid
].arch
.vex
.guest_X30
;
112 # elif defined(VGA_s390x)
113 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_IA
;
114 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_SP
;
115 regs
->misc
.S390X
.r_fp
116 = VG_(threads
)[tid
].arch
.vex
.guest_FP
;
117 regs
->misc
.S390X
.r_lr
118 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs
->misc
.S390X
.r_f0
121 = VG_(threads
)[tid
].arch
.vex
.guest_v0
.w64
[0];
122 regs
->misc
.S390X
.r_f1
123 = VG_(threads
)[tid
].arch
.vex
.guest_v1
.w64
[0];
124 regs
->misc
.S390X
.r_f2
125 = VG_(threads
)[tid
].arch
.vex
.guest_v2
.w64
[0];
126 regs
->misc
.S390X
.r_f3
127 = VG_(threads
)[tid
].arch
.vex
.guest_v3
.w64
[0];
128 regs
->misc
.S390X
.r_f4
129 = VG_(threads
)[tid
].arch
.vex
.guest_v4
.w64
[0];
130 regs
->misc
.S390X
.r_f5
131 = VG_(threads
)[tid
].arch
.vex
.guest_v5
.w64
[0];
132 regs
->misc
.S390X
.r_f6
133 = VG_(threads
)[tid
].arch
.vex
.guest_v6
.w64
[0];
134 regs
->misc
.S390X
.r_f7
135 = VG_(threads
)[tid
].arch
.vex
.guest_v7
.w64
[0];
136 # elif defined(VGA_mips32)
137 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
138 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
139 regs
->misc
.MIPS32
.r30
140 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
141 regs
->misc
.MIPS32
.r31
142 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
143 regs
->misc
.MIPS32
.r28
144 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
145 # elif defined(VGA_mips64)
146 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
147 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
148 regs
->misc
.MIPS64
.r30
149 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
150 regs
->misc
.MIPS64
.r31
151 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
152 regs
->misc
.MIPS64
.r28
153 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
155 # error "Unknown arch"
160 VG_(get_shadow_regs_area
) ( ThreadId tid
,
162 /*SRC*/Int shadowNo
, PtrdiffT offset
, SizeT size
)
166 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
167 vg_assert(VG_(is_valid_tid
)(tid
));
169 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
170 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
172 tst
= & VG_(threads
)[tid
];
175 case 0: src
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
176 case 1: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
177 case 2: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
179 vg_assert(src
!= NULL
);
180 VG_(memcpy
)( dst
, src
, size
);
184 VG_(set_shadow_regs_area
) ( ThreadId tid
,
185 /*DST*/Int shadowNo
, PtrdiffT offset
, SizeT size
,
186 /*SRC*/const UChar
* src
)
190 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
191 vg_assert(VG_(is_valid_tid
)(tid
));
193 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
194 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
196 tst
= & VG_(threads
)[tid
];
199 case 0: dst
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
200 case 1: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
201 case 2: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
203 vg_assert(dst
!= NULL
);
204 VG_(memcpy
)( dst
, src
, size
);
208 static void apply_to_GPs_of_tid(ThreadId tid
, void (*f
)(ThreadId
,
211 VexGuestArchState
* vex
= &(VG_(get_ThreadState
)(tid
)->arch
.vex
);
212 VG_(debugLog
)(2, "machine", "apply_to_GPs_of_tid %u\n", tid
);
214 (*f
)(tid
, "EAX", vex
->guest_EAX
);
215 (*f
)(tid
, "ECX", vex
->guest_ECX
);
216 (*f
)(tid
, "EDX", vex
->guest_EDX
);
217 (*f
)(tid
, "EBX", vex
->guest_EBX
);
218 (*f
)(tid
, "ESI", vex
->guest_ESI
);
219 (*f
)(tid
, "EDI", vex
->guest_EDI
);
220 (*f
)(tid
, "ESP", vex
->guest_ESP
);
221 (*f
)(tid
, "EBP", vex
->guest_EBP
);
222 #elif defined(VGA_amd64)
223 (*f
)(tid
, "RAX", vex
->guest_RAX
);
224 (*f
)(tid
, "RCX", vex
->guest_RCX
);
225 (*f
)(tid
, "RDX", vex
->guest_RDX
);
226 (*f
)(tid
, "RBX", vex
->guest_RBX
);
227 (*f
)(tid
, "RSI", vex
->guest_RSI
);
228 (*f
)(tid
, "RDI", vex
->guest_RDI
);
229 (*f
)(tid
, "RSP", vex
->guest_RSP
);
230 (*f
)(tid
, "RBP", vex
->guest_RBP
);
231 (*f
)(tid
, "R8" , vex
->guest_R8
);
232 (*f
)(tid
, "R9" , vex
->guest_R9
);
233 (*f
)(tid
, "R10", vex
->guest_R10
);
234 (*f
)(tid
, "R11", vex
->guest_R11
);
235 (*f
)(tid
, "R12", vex
->guest_R12
);
236 (*f
)(tid
, "R13", vex
->guest_R13
);
237 (*f
)(tid
, "R14", vex
->guest_R14
);
238 (*f
)(tid
, "R15", vex
->guest_R15
);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f
)(tid
, "GPR0" , vex
->guest_GPR0
);
241 (*f
)(tid
, "GPR1" , vex
->guest_GPR1
);
242 (*f
)(tid
, "GPR2" , vex
->guest_GPR2
);
243 (*f
)(tid
, "GPR3" , vex
->guest_GPR3
);
244 (*f
)(tid
, "GPR4" , vex
->guest_GPR4
);
245 (*f
)(tid
, "GPR5" , vex
->guest_GPR5
);
246 (*f
)(tid
, "GPR6" , vex
->guest_GPR6
);
247 (*f
)(tid
, "GPR7" , vex
->guest_GPR7
);
248 (*f
)(tid
, "GPR8" , vex
->guest_GPR8
);
249 (*f
)(tid
, "GPR9" , vex
->guest_GPR9
);
250 (*f
)(tid
, "GPR10", vex
->guest_GPR10
);
251 (*f
)(tid
, "GPR11", vex
->guest_GPR11
);
252 (*f
)(tid
, "GPR12", vex
->guest_GPR12
);
253 (*f
)(tid
, "GPR13", vex
->guest_GPR13
);
254 (*f
)(tid
, "GPR14", vex
->guest_GPR14
);
255 (*f
)(tid
, "GPR15", vex
->guest_GPR15
);
256 (*f
)(tid
, "GPR16", vex
->guest_GPR16
);
257 (*f
)(tid
, "GPR17", vex
->guest_GPR17
);
258 (*f
)(tid
, "GPR18", vex
->guest_GPR18
);
259 (*f
)(tid
, "GPR19", vex
->guest_GPR19
);
260 (*f
)(tid
, "GPR20", vex
->guest_GPR20
);
261 (*f
)(tid
, "GPR21", vex
->guest_GPR21
);
262 (*f
)(tid
, "GPR22", vex
->guest_GPR22
);
263 (*f
)(tid
, "GPR23", vex
->guest_GPR23
);
264 (*f
)(tid
, "GPR24", vex
->guest_GPR24
);
265 (*f
)(tid
, "GPR25", vex
->guest_GPR25
);
266 (*f
)(tid
, "GPR26", vex
->guest_GPR26
);
267 (*f
)(tid
, "GPR27", vex
->guest_GPR27
);
268 (*f
)(tid
, "GPR28", vex
->guest_GPR28
);
269 (*f
)(tid
, "GPR29", vex
->guest_GPR29
);
270 (*f
)(tid
, "GPR30", vex
->guest_GPR30
);
271 (*f
)(tid
, "GPR31", vex
->guest_GPR31
);
272 (*f
)(tid
, "CTR" , vex
->guest_CTR
);
273 (*f
)(tid
, "LR" , vex
->guest_LR
);
274 #elif defined(VGA_arm)
275 (*f
)(tid
, "R0" , vex
->guest_R0
);
276 (*f
)(tid
, "R1" , vex
->guest_R1
);
277 (*f
)(tid
, "R2" , vex
->guest_R2
);
278 (*f
)(tid
, "R3" , vex
->guest_R3
);
279 (*f
)(tid
, "R4" , vex
->guest_R4
);
280 (*f
)(tid
, "R5" , vex
->guest_R5
);
281 (*f
)(tid
, "R6" , vex
->guest_R6
);
282 (*f
)(tid
, "R8" , vex
->guest_R8
);
283 (*f
)(tid
, "R9" , vex
->guest_R9
);
284 (*f
)(tid
, "R10", vex
->guest_R10
);
285 (*f
)(tid
, "R11", vex
->guest_R11
);
286 (*f
)(tid
, "R12", vex
->guest_R12
);
287 (*f
)(tid
, "R13", vex
->guest_R13
);
288 (*f
)(tid
, "R14", vex
->guest_R14
);
289 #elif defined(VGA_s390x)
290 (*f
)(tid
, "r0" , vex
->guest_r0
);
291 (*f
)(tid
, "r1" , vex
->guest_r1
);
292 (*f
)(tid
, "r2" , vex
->guest_r2
);
293 (*f
)(tid
, "r3" , vex
->guest_r3
);
294 (*f
)(tid
, "r4" , vex
->guest_r4
);
295 (*f
)(tid
, "r5" , vex
->guest_r5
);
296 (*f
)(tid
, "r6" , vex
->guest_r6
);
297 (*f
)(tid
, "r7" , vex
->guest_r7
);
298 (*f
)(tid
, "r8" , vex
->guest_r8
);
299 (*f
)(tid
, "r9" , vex
->guest_r9
);
300 (*f
)(tid
, "r10", vex
->guest_r10
);
301 (*f
)(tid
, "r11", vex
->guest_r11
);
302 (*f
)(tid
, "r12", vex
->guest_r12
);
303 (*f
)(tid
, "r13", vex
->guest_r13
);
304 (*f
)(tid
, "r14", vex
->guest_r14
);
305 (*f
)(tid
, "r15", vex
->guest_r15
);
306 #elif defined(VGA_mips32) || defined(VGA_mips64)
307 (*f
)(tid
, "r0" , vex
->guest_r0
);
308 (*f
)(tid
, "r1" , vex
->guest_r1
);
309 (*f
)(tid
, "r2" , vex
->guest_r2
);
310 (*f
)(tid
, "r3" , vex
->guest_r3
);
311 (*f
)(tid
, "r4" , vex
->guest_r4
);
312 (*f
)(tid
, "r5" , vex
->guest_r5
);
313 (*f
)(tid
, "r6" , vex
->guest_r6
);
314 (*f
)(tid
, "r7" , vex
->guest_r7
);
315 (*f
)(tid
, "r8" , vex
->guest_r8
);
316 (*f
)(tid
, "r9" , vex
->guest_r9
);
317 (*f
)(tid
, "r10", vex
->guest_r10
);
318 (*f
)(tid
, "r11", vex
->guest_r11
);
319 (*f
)(tid
, "r12", vex
->guest_r12
);
320 (*f
)(tid
, "r13", vex
->guest_r13
);
321 (*f
)(tid
, "r14", vex
->guest_r14
);
322 (*f
)(tid
, "r15", vex
->guest_r15
);
323 (*f
)(tid
, "r16", vex
->guest_r16
);
324 (*f
)(tid
, "r17", vex
->guest_r17
);
325 (*f
)(tid
, "r18", vex
->guest_r18
);
326 (*f
)(tid
, "r19", vex
->guest_r19
);
327 (*f
)(tid
, "r20", vex
->guest_r20
);
328 (*f
)(tid
, "r21", vex
->guest_r21
);
329 (*f
)(tid
, "r22", vex
->guest_r22
);
330 (*f
)(tid
, "r23", vex
->guest_r23
);
331 (*f
)(tid
, "r24", vex
->guest_r24
);
332 (*f
)(tid
, "r25", vex
->guest_r25
);
333 (*f
)(tid
, "r26", vex
->guest_r26
);
334 (*f
)(tid
, "r27", vex
->guest_r27
);
335 (*f
)(tid
, "r28", vex
->guest_r28
);
336 (*f
)(tid
, "r29", vex
->guest_r29
);
337 (*f
)(tid
, "r30", vex
->guest_r30
);
338 (*f
)(tid
, "r31", vex
->guest_r31
);
339 #elif defined(VGA_arm64)
340 (*f
)(tid
, "x0" , vex
->guest_X0
);
341 (*f
)(tid
, "x1" , vex
->guest_X1
);
342 (*f
)(tid
, "x2" , vex
->guest_X2
);
343 (*f
)(tid
, "x3" , vex
->guest_X3
);
344 (*f
)(tid
, "x4" , vex
->guest_X4
);
345 (*f
)(tid
, "x5" , vex
->guest_X5
);
346 (*f
)(tid
, "x6" , vex
->guest_X6
);
347 (*f
)(tid
, "x7" , vex
->guest_X7
);
348 (*f
)(tid
, "x8" , vex
->guest_X8
);
349 (*f
)(tid
, "x9" , vex
->guest_X9
);
350 (*f
)(tid
, "x10", vex
->guest_X10
);
351 (*f
)(tid
, "x11", vex
->guest_X11
);
352 (*f
)(tid
, "x12", vex
->guest_X12
);
353 (*f
)(tid
, "x13", vex
->guest_X13
);
354 (*f
)(tid
, "x14", vex
->guest_X14
);
355 (*f
)(tid
, "x15", vex
->guest_X15
);
356 (*f
)(tid
, "x16", vex
->guest_X16
);
357 (*f
)(tid
, "x17", vex
->guest_X17
);
358 (*f
)(tid
, "x18", vex
->guest_X18
);
359 (*f
)(tid
, "x19", vex
->guest_X19
);
360 (*f
)(tid
, "x20", vex
->guest_X20
);
361 (*f
)(tid
, "x21", vex
->guest_X21
);
362 (*f
)(tid
, "x22", vex
->guest_X22
);
363 (*f
)(tid
, "x23", vex
->guest_X23
);
364 (*f
)(tid
, "x24", vex
->guest_X24
);
365 (*f
)(tid
, "x25", vex
->guest_X25
);
366 (*f
)(tid
, "x26", vex
->guest_X26
);
367 (*f
)(tid
, "x27", vex
->guest_X27
);
368 (*f
)(tid
, "x28", vex
->guest_X28
);
369 (*f
)(tid
, "x29", vex
->guest_X29
);
370 (*f
)(tid
, "x30", vex
->guest_X30
);
377 void VG_(apply_to_GP_regs
)(void (*f
)(ThreadId
, const HChar
*, UWord
))
381 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
382 if (VG_(is_valid_tid
)(tid
)
383 || VG_(threads
)[tid
].exitreason
== VgSrc_ExitProcess
) {
384 // live thread or thread instructed to die by another thread that
386 apply_to_GPs_of_tid(tid
, f
);
391 void VG_(thread_stack_reset_iter
)(/*OUT*/ThreadId
* tid
)
393 *tid
= (ThreadId
)(-1);
396 Bool
VG_(thread_stack_next
)(/*MOD*/ThreadId
* tid
,
397 /*OUT*/Addr
* stack_min
,
398 /*OUT*/Addr
* stack_max
)
401 for (i
= (*tid
)+1; i
< VG_N_THREADS
; i
++) {
402 if (i
== VG_INVALID_THREADID
)
404 if (VG_(threads
)[i
].status
!= VgTs_Empty
) {
406 *stack_min
= VG_(get_SP
)(i
);
407 *stack_max
= VG_(threads
)[i
].client_stack_highest_byte
;
414 Addr
VG_(thread_get_stack_max
)(ThreadId tid
)
416 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
417 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
418 return VG_(threads
)[tid
].client_stack_highest_byte
;
421 SizeT
VG_(thread_get_stack_size
)(ThreadId tid
)
423 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
424 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
425 return VG_(threads
)[tid
].client_stack_szB
;
428 Addr
VG_(thread_get_altstack_min
)(ThreadId tid
)
430 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
431 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
432 return (Addr
)VG_(threads
)[tid
].altstack
.ss_sp
;
435 SizeT
VG_(thread_get_altstack_size
)(ThreadId tid
)
437 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
438 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
439 return VG_(threads
)[tid
].altstack
.ss_size
;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done
= False
;
458 /* --- all archs --- */
459 static VexArch va
= VexArch_INVALID
;
460 static VexArchInfo vai
;
463 UInt
VG_(machine_x86_have_mxcsr
) = 0;
465 #if defined(VGA_ppc32)
466 UInt
VG_(machine_ppc32_has_FP
) = 0;
467 UInt
VG_(machine_ppc32_has_VMX
) = 0;
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong
VG_(machine_ppc64_has_VMX
) = 0;
473 Int
VG_(machine_arm_archlevel
) = 4;
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
481 #include "pub_core_libcsetjmp.h"
482 static VG_MINIMAL_JMP_BUF(env_unsup_insn
);
483 static void handler_unsup_insn ( Int x
) {
484 VG_MINIMAL_LONGJMP(env_unsup_insn
);
489 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
490 * handlers are installed. Determines the sizes affected by dcbz
491 * and dcbzl instructions and updates the given VexArchInfo structure
494 * Not very defensive: assumes that as long as the dcbz/dcbzl
495 * instructions don't raise a SIGILL, that they will zero an aligned,
496 * contiguous block of memory of a sensible size. */
497 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
498 static void find_ppc_dcbz_sz(VexArchInfo
*arch_info
)
502 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
503 char test_block
[4*MAX_DCBZL_SZB
];
504 char *aligned
= test_block
;
507 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
508 aligned
= (char *)(((HWord
)aligned
+ MAX_DCBZL_SZB
) & ~(MAX_DCBZL_SZB
- 1));
509 vg_assert((aligned
+ MAX_DCBZL_SZB
) <= &test_block
[sizeof(test_block
)]);
511 /* dcbz often clears 32B, although sometimes whatever the native cache
513 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
514 __asm__
__volatile__("dcbz 0,%0"
516 : "r" (aligned
) /*in*/
517 : "memory" /*clobber*/);
518 for (dcbz_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
522 vg_assert(dcbz_szB
== 16 || dcbz_szB
== 32 || dcbz_szB
== 64 || dcbz_szB
== 128);
524 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
525 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
526 dcbzl_szB
= 0; /* indicates unsupported */
529 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
530 /* some older assemblers won't understand the dcbzl instruction
531 * variant, so we directly emit the instruction ourselves */
532 __asm__
__volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
534 : "r" (aligned
) /*in*/
535 : "memory", "r9" /*clobber*/);
536 for (dcbzl_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
540 vg_assert(dcbzl_szB
== 16 || dcbzl_szB
== 32 || dcbzl_szB
== 64 || dcbzl_szB
== 128);
543 arch_info
->ppc_dcbz_szB
= dcbz_szB
;
544 arch_info
->ppc_dcbzl_szB
= dcbzl_szB
;
546 VG_(debugLog
)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
547 dcbz_szB
, dcbzl_szB
);
548 # undef MAX_DCBZL_SZB
550 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
554 /* Read /proc/cpuinfo. Look for lines like these
556 processor 0: version = FF, identification = 0117C9, machine = 2064
558 and return the machine model. If the machine model could not be determined
559 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
561 static UInt
VG_(get_machine_model
)(void)
563 static struct model_map
{
567 { "2064", VEX_S390X_MODEL_Z900
},
568 { "2066", VEX_S390X_MODEL_Z800
},
569 { "2084", VEX_S390X_MODEL_Z990
},
570 { "2086", VEX_S390X_MODEL_Z890
},
571 { "2094", VEX_S390X_MODEL_Z9_EC
},
572 { "2096", VEX_S390X_MODEL_Z9_BC
},
573 { "2097", VEX_S390X_MODEL_Z10_EC
},
574 { "2098", VEX_S390X_MODEL_Z10_BC
},
575 { "2817", VEX_S390X_MODEL_Z196
},
576 { "2818", VEX_S390X_MODEL_Z114
},
577 { "2827", VEX_S390X_MODEL_ZEC12
},
578 { "2828", VEX_S390X_MODEL_ZBC12
},
579 { "2964", VEX_S390X_MODEL_Z13
},
580 { "2965", VEX_S390X_MODEL_Z13S
},
581 { "3906", VEX_S390X_MODEL_Z14
},
582 { "3907", VEX_S390X_MODEL_Z14_ZR1
},
587 SizeT num_bytes
, file_buf_size
;
588 HChar
*p
, *m
, *model_name
, *file_buf
;
590 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
591 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
592 if ( sr_isError(fd
) ) return VEX_S390X_MODEL_UNKNOWN
;
596 /* Determine the size of /proc/cpuinfo.
597 Work around broken-ness in /proc file system implementation.
598 fstat returns a zero size for /proc/cpuinfo although it is
599 claimed to be a regular file. */
601 file_buf_size
= 1000;
602 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
604 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
608 if (n
< file_buf_size
) break; /* reached EOF */
611 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
613 if (num_bytes
> file_buf_size
) {
614 VG_(free
)( file_buf
);
615 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
616 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
617 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
618 if (n
< 0) num_bytes
= 0;
621 file_buf
[num_bytes
] = '\0';
625 model
= VEX_S390X_MODEL_UNKNOWN
;
626 for (p
= file_buf
; *p
; ++p
) {
627 /* Beginning of line */
628 if (VG_(strncmp
)( p
, "processor", sizeof "processor" - 1 ) != 0) continue;
630 m
= VG_(strstr
)( p
, "machine" );
631 if (m
== NULL
) continue;
633 p
= m
+ sizeof "machine" - 1;
634 while ( VG_(isspace
)( *p
) || *p
== '=') {
635 if (*p
== '\n') goto next_line
;
640 for (n
= 0; n
< sizeof model_map
/ sizeof model_map
[0]; ++n
) {
641 struct model_map
*mm
= model_map
+ n
;
642 SizeT len
= VG_(strlen
)( mm
->name
);
643 if ( VG_(strncmp
)( mm
->name
, model_name
, len
) == 0 &&
644 VG_(isspace
)( model_name
[len
] )) {
645 if (mm
->id
< model
) model
= mm
->id
;
646 p
= model_name
+ len
;
650 /* Skip until end-of-line */
656 VG_(free
)( file_buf
);
657 VG_(debugLog
)(1, "machine", "model = %s\n",
658 model
== VEX_S390X_MODEL_UNKNOWN
? "UNKNOWN"
659 : model_map
[model
].name
);
663 #endif /* defined(VGA_s390x) */
665 #if defined(VGA_mips32) || defined(VGA_mips64)
668 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
669 * determine what CPU it is (it searches only for the models that are or may be
670 * supported by Valgrind).
672 static Bool
VG_(parse_cpuinfo
)(void)
674 const char *search_Broadcom_str
= "cpu model\t\t: Broadcom";
675 const char *search_Cavium_str
= "cpu model\t\t: Cavium";
676 const char *search_Ingenic_str
= "cpu model\t\t: Ingenic";
677 const char *search_Loongson_str
= "cpu model\t\t: ICT Loongson";
678 const char *search_MIPS_str
= "cpu model\t\t: MIPS";
679 const char *search_Netlogic_str
= "cpu model\t\t: Netlogic";
683 SizeT num_bytes
, file_buf_size
;
684 HChar
*file_buf
, *isa
;
686 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
687 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
688 if ( sr_isError(fd
) ) return False
;
692 /* Determine the size of /proc/cpuinfo.
693 Work around broken-ness in /proc file system implementation.
694 fstat returns a zero size for /proc/cpuinfo although it is
695 claimed to be a regular file. */
697 file_buf_size
= 1000;
698 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
700 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
704 if (n
< file_buf_size
) break; /* reached EOF */
707 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
709 if (num_bytes
> file_buf_size
) {
710 VG_(free
)( file_buf
);
711 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
712 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
713 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
714 if (n
< 0) num_bytes
= 0;
717 file_buf
[num_bytes
] = '\0';
721 if (VG_(strstr
)(file_buf
, search_Broadcom_str
) != NULL
)
722 vai
.hwcaps
= VEX_PRID_COMP_BROADCOM
;
723 else if (VG_(strstr
)(file_buf
, search_Netlogic_str
) != NULL
)
724 vai
.hwcaps
= VEX_PRID_COMP_NETLOGIC
;
725 else if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
726 vai
.hwcaps
= VEX_PRID_COMP_CAVIUM
;
727 else if (VG_(strstr
)(file_buf
, search_MIPS_str
) != NULL
)
728 vai
.hwcaps
= VEX_PRID_COMP_MIPS
;
729 else if (VG_(strstr
)(file_buf
, search_Ingenic_str
) != NULL
)
730 vai
.hwcaps
= VEX_PRID_COMP_INGENIC_E1
;
731 else if (VG_(strstr
)(file_buf
, search_Loongson_str
) != NULL
)
732 vai
.hwcaps
= (VEX_PRID_COMP_LEGACY
| VEX_PRID_IMP_LOONGSON_64
);
734 /* Did not find string in the proc file. */
740 isa
= VG_(strstr
)(file_buf
, "isa\t\t\t: ");
743 if (VG_(strstr
) (isa
, "mips32r1") != NULL
)
744 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
745 if (VG_(strstr
) (isa
, "mips32r2") != NULL
)
746 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
747 if (VG_(strstr
) (isa
, "mips32r6") != NULL
)
748 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R6
;
749 if (VG_(strstr
) (isa
, "mips64r1") != NULL
)
750 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R1
;
751 if (VG_(strstr
) (isa
, "mips64r2") != NULL
)
752 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
;
753 if (VG_(strstr
) (isa
, "mips64r6") != NULL
)
754 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R6
;
757 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
758 * decide to change incorrect settings in
759 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
760 * The current settings show mips32r1, mips32r2 and mips64r1 as
761 * unsupported ISAs by Cavium MIPS CPUs.
763 if (VEX_MIPS_COMP_ID(vai
.hwcaps
) == VEX_PRID_COMP_CAVIUM
) {
764 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
| VEX_MIPS_CPU_ISA_M32R2
|
765 VEX_MIPS_CPU_ISA_M64R1
;
769 * Kernel does not provide information about supported ISAs.
770 * Populate the isa level flags based on the CPU model. That is our
773 switch VEX_MIPS_COMP_ID(vai
.hwcaps
) {
774 case VEX_PRID_COMP_CAVIUM
:
775 case VEX_PRID_COMP_NETLOGIC
:
776 vai
.hwcaps
|= (VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
);
778 case VEX_PRID_COMP_INGENIC_E1
:
779 case VEX_PRID_COMP_MIPS
:
780 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
782 case VEX_PRID_COMP_BROADCOM
:
783 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
785 case VEX_PRID_COMP_LEGACY
:
786 if ((VEX_MIPS_PROC_ID(vai
.hwcaps
) == VEX_PRID_IMP_LOONGSON_64
))
787 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
|
788 VEX_MIPS_CPU_ISA_M32R2
| VEX_MIPS_CPU_ISA_M32R1
;
798 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
800 #if defined(VGP_arm64_linux)
802 /* Check to see whether we are running on a Cavium core, and if so auto-enable
803 the fallback LLSC implementation. See #369459. */
805 static Bool
VG_(parse_cpuinfo
)(void)
807 const char *search_Cavium_str
= "CPU implementer\t: 0x43";
811 SizeT num_bytes
, file_buf_size
;
814 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
815 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
816 if ( sr_isError(fd
) ) return False
;
820 /* Determine the size of /proc/cpuinfo.
821 Work around broken-ness in /proc file system implementation.
822 fstat returns a zero size for /proc/cpuinfo although it is
823 claimed to be a regular file. */
825 file_buf_size
= 1000;
826 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
828 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
832 if (n
< file_buf_size
) break; /* reached EOF */
835 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
837 if (num_bytes
> file_buf_size
) {
838 VG_(free
)( file_buf
);
839 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
840 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
841 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
842 if (n
< 0) num_bytes
= 0;
845 file_buf
[num_bytes
] = '\0';
849 if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
850 vai
.arm64_requires_fallback_LLSC
= True
;
856 #endif /* defined(VGP_arm64_linux) */
858 Bool
VG_(machine_get_hwcaps
)( void )
860 vg_assert(hwcaps_done
== False
);
863 // Whack default settings into vai, so that we only need to fill in
864 // any interesting bits.
865 LibVEX_default_VexArchInfo(&vai
);
868 { Bool have_sse1
, have_sse2
, have_sse3
, have_cx8
, have_lzcnt
, have_mmxext
;
869 UInt eax
, ebx
, ecx
, edx
, max_extended
;
873 if (!VG_(has_cpuid
)())
874 /* we can't do cpuid at all. Give up. */
877 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
879 /* we can't ask for cpuid(x) for x > 0. Give up. */
882 /* Get processor ID string, and max basic/extended index
884 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
885 VG_(memcpy
)(&vstr
[4], &edx
, 4);
886 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
889 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
892 /* get capabilities bits into edx */
893 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
895 have_sse1
= (edx
& (1<<25)) != 0; /* True => have sse insns */
896 have_sse2
= (edx
& (1<<26)) != 0; /* True => have sse2 insns */
897 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
899 /* cmpxchg8b is a minimum requirement now; if we don't have it we
900 must simply give up. But all CPUs since Pentium-I have it, so
901 that doesn't seem like much of a restriction. */
902 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
906 /* Figure out if this is an AMD that can do MMXEXT. */
908 if (0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
909 && max_extended
>= 0x80000001) {
910 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
911 /* Some older AMD processors support a sse1 subset (Integer SSE). */
912 have_mmxext
= !have_sse1
&& ((edx
& (1<<22)) != 0);
915 /* Figure out if this is an AMD or Intel that can do LZCNT. */
917 if ((0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
918 || 0 == VG_(strcmp
)(vstr
, "GenuineIntel"))
919 && max_extended
>= 0x80000001) {
920 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
921 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
924 /* Intel processors don't define the mmxext extension, but since it
925 is just a sse1 subset always define it when we have sse1. */
930 vai
.endness
= VexEndnessLE
;
932 if (have_sse3
&& have_sse2
&& have_sse1
&& have_mmxext
) {
933 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
934 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
935 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
936 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE3
;
938 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
939 VG_(machine_x86_have_mxcsr
) = 1;
940 } else if (have_sse2
&& have_sse1
&& have_mmxext
) {
941 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
942 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
943 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
945 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
946 VG_(machine_x86_have_mxcsr
) = 1;
947 } else if (have_sse1
&& have_mmxext
) {
948 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
949 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
950 VG_(machine_x86_have_mxcsr
) = 1;
951 } else if (have_mmxext
) {
952 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
; /*integer only sse1 subset*/
953 VG_(machine_x86_have_mxcsr
) = 0;
955 vai
.hwcaps
= 0; /*baseline - no sse at all*/
956 VG_(machine_x86_have_mxcsr
) = 0;
959 VG_(machine_get_cache_info
)(&vai
);
964 #elif defined(VGA_amd64)
965 { Bool have_sse3
, have_ssse3
, have_cx8
, have_cx16
;
966 Bool have_lzcnt
, have_avx
, have_bmi
, have_avx2
;
967 Bool have_rdtscp
, have_rdrand
, have_f16c
;
968 UInt eax
, ebx
, ecx
, edx
, max_basic
, max_extended
;
973 have_sse3
= have_ssse3
= have_cx8
= have_cx16
974 = have_lzcnt
= have_avx
= have_bmi
= have_avx2
975 = have_rdtscp
= have_rdrand
= have_f16c
= False
;
977 eax
= ebx
= ecx
= edx
= max_basic
= max_extended
= 0;
979 if (!VG_(has_cpuid
)())
980 /* we can't do cpuid at all. Give up. */
983 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
986 /* we can't ask for cpuid(x) for x > 0. Give up. */
989 /* Get processor ID string, and max basic/extended index
991 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
992 VG_(memcpy
)(&vstr
[4], &edx
, 4);
993 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
996 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
999 /* get capabilities bits into edx */
1000 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
1002 // we assume that SSE1 and SSE2 are available by default
1003 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
1004 have_ssse3
= (ecx
& (1<<9)) != 0; /* True => have Sup SSE3 insns */
1009 // osxsave is ecx:27
1011 have_f16c
= (ecx
& (1<<29)) != 0; /* True => have F16C insns */
1012 have_rdrand
= (ecx
& (1<<30)) != 0; /* True => have RDRAND insns */
1015 /* have_fma = False; */
1016 if ( (ecx
& ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1017 /* Processor supports AVX instructions and XGETBV is enabled
1018 by OS and AVX instructions are enabled by the OS. */
1020 __asm__
__volatile__("movq $0,%%rcx ; "
1021 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1023 :/*OUT*/"=r"(w
) :/*IN*/
1024 :/*TRASH*/"rdx","rcx","rax");
1026 if ((xgetbv_0
& 7) == 7) {
1027 /* Only say we have AVX if the XSAVE-allowable
1028 bitfield-mask allows x87, SSE and AVX state. We could
1029 actually run with a more restrictive XGETBV(0) value,
1030 but VEX's implementation of XSAVE and XRSTOR assumes
1031 that all 3 bits are enabled.
1033 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1034 state component [2] (the YMM high halves) are located in
1035 the XSAVE image at offsets 576 .. 831. So we have to
1036 check that here before declaring AVX to be supported. */
1037 UInt eax2
, ebx2
, ecx2
, edx2
;
1038 VG_(cpuid
)(0xD, 2, &eax2
, &ebx2
, &ecx2
, &edx2
);
1039 if (ebx2
== 576 && eax2
== 256) {
1042 /* have_fma = (ecx & (1<<12)) != 0; */
1043 /* have_fma: Probably correct, but gcc complains due to
1048 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1049 must simply give up. But all CPUs since Pentium-I have it, so
1050 that doesn't seem like much of a restriction. */
1051 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
1055 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1056 have_cx16
= (ecx
& (1<<13)) != 0; /* True => have cmpxchg16b */
1058 /* Figure out if this CPU can do LZCNT. */
1060 if (max_extended
>= 0x80000001) {
1061 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1062 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
1065 /* Can we do RDTSCP? */
1066 have_rdtscp
= False
;
1067 if (max_extended
>= 0x80000001) {
1068 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1069 have_rdtscp
= (edx
& (1<<27)) != 0; /* True => have RDTSVCP */
1072 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1075 if (have_avx
&& max_basic
>= 7) {
1076 VG_(cpuid
)(7, 0, &eax
, &ebx
, &ecx
, &edx
);
1077 have_bmi
= (ebx
& (1<<3)) != 0; /* True => have BMI1 */
1078 have_avx2
= (ebx
& (1<<5)) != 0; /* True => have AVX2 */
1081 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1082 it's convenient to restrict them to the AVX case since the simulated
1083 CPUID we'll offer them on has AVX as a base. */
1086 have_rdrand
= False
;
1090 vai
.endness
= VexEndnessLE
;
1091 vai
.hwcaps
= (have_sse3
? VEX_HWCAPS_AMD64_SSE3
: 0)
1092 | (have_ssse3
? VEX_HWCAPS_AMD64_SSSE3
: 0)
1093 | (have_cx16
? VEX_HWCAPS_AMD64_CX16
: 0)
1094 | (have_lzcnt
? VEX_HWCAPS_AMD64_LZCNT
: 0)
1095 | (have_avx
? VEX_HWCAPS_AMD64_AVX
: 0)
1096 | (have_bmi
? VEX_HWCAPS_AMD64_BMI
: 0)
1097 | (have_avx2
? VEX_HWCAPS_AMD64_AVX2
: 0)
1098 | (have_rdtscp
? VEX_HWCAPS_AMD64_RDTSCP
: 0)
1099 | (have_f16c
? VEX_HWCAPS_AMD64_F16C
: 0)
1100 | (have_rdrand
? VEX_HWCAPS_AMD64_RDRAND
: 0);
1102 VG_(machine_get_cache_info
)(&vai
);
1107 #elif defined(VGA_ppc32)
1109 /* Find out which subset of the ppc32 instruction set is supported by
1110 verifying whether various ppc32 instructions generate a SIGILL
1111 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1112 AT_PLATFORM entries in the ELF auxiliary table -- see also
1113 the_iifii.client_auxv in m_main.c.
1115 vki_sigset_t saved_set
, tmp_set
;
1116 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1117 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1119 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1120 volatile Bool have_isa_2_07
, have_isa_3_0
;
1123 /* This is a kludge. Really we ought to back-convert saved_act
1124 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1125 since that's a no-op on all ppc32 platforms so far supported,
1126 it's not worth the typing effort. At least include most basic
1128 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1130 VG_(sigemptyset
)(&tmp_set
);
1131 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1132 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1134 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1137 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1139 tmp_sigill_act
= saved_sigill_act
;
1141 r
= VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1143 tmp_sigfpe_act
= saved_sigfpe_act
;
1145 /* NODEFER: signal handler does not return (from the kernel's point of
1146 view), hence if it is to successfully catch a signal more than once,
1147 we need the NODEFER flag. */
1148 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1149 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1150 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1151 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1152 r
= VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1155 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1156 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1157 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1158 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1159 r
= VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1162 /* standard FP insns */
1164 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1167 __asm__
__volatile__(".long 0xFC000090"); /*fmr 0,0 */
1172 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1175 /* Unfortunately some older assemblers don't speak Altivec (or
1176 choose not to), so to be safe we directly emit the 32-bit
1177 word corresponding to "vor 0,0,0". This fixes a build
1178 problem that happens on Debian 3.1 (ppc32), and probably
1179 various other places. */
1180 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1183 /* General-Purpose optional (fsqrt, fsqrts) */
1185 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1188 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1191 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1193 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1196 __asm__
__volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1199 /* VSX support implies Power ISA 2.06 */
1201 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1204 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1207 /* Check for Decimal Floating Point (DFP) support. */
1209 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1212 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1215 /* Check for ISA 2.07 support. */
1216 have_isa_2_07
= True
;
1217 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1218 have_isa_2_07
= False
;
1220 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1223 /* Check for ISA 3.0 support. */
1224 have_isa_3_0
= True
;
1225 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1226 have_isa_3_0
= False
;
1228 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1231 /* determine dcbz/dcbzl sizes while we still have the signal
1232 * handlers registered */
1233 find_ppc_dcbz_sz(&vai
);
1235 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1237 r
= VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1239 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1241 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1242 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1243 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1244 (Int
)have_isa_2_07
, (Int
)have_isa_3_0
);
1245 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1246 if (have_V
&& !have_F
)
1248 if (have_FX
&& !have_F
)
1250 if (have_GX
&& !have_F
)
1253 VG_(machine_ppc32_has_FP
) = have_F
? 1 : 0;
1254 VG_(machine_ppc32_has_VMX
) = have_V
? 1 : 0;
1257 vai
.endness
= VexEndnessBE
;
1260 if (have_F
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_F
;
1261 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_V
;
1262 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_FX
;
1263 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_GX
;
1264 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_VX
;
1265 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_DFP
;
1266 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA2_07
;
1267 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA3_0
;
1269 VG_(machine_get_cache_info
)(&vai
);
1271 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1272 called before we're ready to go. */
1276 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1278 /* Same instruction set detection algorithm as for ppc32. */
1279 vki_sigset_t saved_set
, tmp_set
;
1280 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1281 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1283 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1284 volatile Bool have_isa_2_07
, have_isa_3_0
;
1287 /* This is a kludge. Really we ought to back-convert saved_act
1288 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1289 since that's a no-op on all ppc64 platforms so far supported,
1290 it's not worth the typing effort. At least include most basic
1292 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1294 VG_(sigemptyset
)(&tmp_set
);
1295 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1296 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1298 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1301 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1303 tmp_sigill_act
= saved_sigill_act
;
1305 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1306 tmp_sigfpe_act
= saved_sigfpe_act
;
1308 /* NODEFER: signal handler does not return (from the kernel's point of
1309 view), hence if it is to successfully catch a signal more than once,
1310 we need the NODEFER flag. */
1311 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1312 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1313 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1314 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1315 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1317 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1318 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1319 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1320 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1321 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1323 /* standard FP insns */
1325 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1328 __asm__
__volatile__("fmr 0,0");
1333 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1336 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1339 /* General-Purpose optional (fsqrt, fsqrts) */
1341 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1344 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1347 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1349 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1352 __asm__
__volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1355 /* VSX support implies Power ISA 2.06 */
1357 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1360 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1363 /* Check for Decimal Floating Point (DFP) support. */
1365 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1368 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1371 /* Check for ISA 2.07 support. */
1372 have_isa_2_07
= True
;
1373 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1374 have_isa_2_07
= False
;
1376 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1379 /* Check for ISA 3.0 support. */
1380 have_isa_3_0
= True
;
1381 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1382 have_isa_3_0
= False
;
1384 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1387 /* determine dcbz/dcbzl sizes while we still have the signal
1388 * handlers registered */
1389 find_ppc_dcbz_sz(&vai
);
1391 VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1392 VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1393 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1394 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1395 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1396 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1397 (Int
)have_isa_2_07
, (int)have_isa_3_0
);
1398 /* on ppc64be, if we don't even have FP, just give up. */
1402 VG_(machine_ppc64_has_VMX
) = have_V
? 1 : 0;
1405 # if defined(VKI_LITTLE_ENDIAN)
1406 vai
.endness
= VexEndnessLE
;
1407 # elif defined(VKI_BIG_ENDIAN)
1408 vai
.endness
= VexEndnessBE
;
1410 vai
.endness
= VexEndness_INVALID
;
1414 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_V
;
1415 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_FX
;
1416 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_GX
;
1417 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_VX
;
1418 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_DFP
;
1419 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA2_07
;
1420 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_0
;
1422 VG_(machine_get_cache_info
)(&vai
);
1424 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1425 called before we're ready to go. */
1429 #elif defined(VGA_s390x)
1431 # include "libvex_s390x_common.h"
1434 /* Instruction set detection code borrowed from ppc above. */
1435 vki_sigset_t saved_set
, tmp_set
;
1436 vki_sigaction_fromK_t saved_sigill_act
;
1437 vki_sigaction_toK_t tmp_sigill_act
;
1439 volatile Bool have_LDISP
, have_STFLE
;
1442 /* If the model is "unknown" don't treat this as an error. Assume
1443 this is a brand-new machine model for which we don't have the
1444 identification yet. Keeping fingers crossed. */
1445 model
= VG_(get_machine_model
)();
1447 /* Unblock SIGILL and stash away the old action for that signal */
1448 VG_(sigemptyset
)(&tmp_set
);
1449 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1451 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1454 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1456 tmp_sigill_act
= saved_sigill_act
;
1458 /* NODEFER: signal handler does not return (from the kernel's point of
1459 view), hence if it is to successfully catch a signal more than once,
1460 we need the NODEFER flag. */
1461 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1462 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1463 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1464 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1465 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1467 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1468 is not supported on z900. */
1471 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1474 /* BASR loads the address of the next insn into r1. Needed to avoid
1475 a segfault in XY. */
1476 __asm__
__volatile__("basr %%r1,%%r0\n\t"
1477 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1478 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1481 /* Check availability of STFLE. If available store facility bits
1483 ULong hoststfle
[S390_NUM_FACILITY_DW
];
1485 for (i
= 0; i
< S390_NUM_FACILITY_DW
; ++i
)
1489 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1492 register ULong reg0
asm("0") = S390_NUM_FACILITY_DW
- 1;
1494 __asm__
__volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1495 : "=m" (hoststfle
), "+d"(reg0
)
1496 : : "cc", "memory");
1499 /* Restore signals */
1500 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1502 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1505 vai
.endness
= VexEndnessBE
;
1508 if (have_STFLE
) vai
.hwcaps
|= VEX_HWCAPS_S390X_STFLE
;
1510 /* Use long displacement only on machines >= z990. For all other
1511 machines it is millicoded and therefore slow. */
1512 if (model
>= VEX_S390X_MODEL_Z990
)
1513 vai
.hwcaps
|= VEX_HWCAPS_S390X_LDISP
;
1516 /* Detect presence of certain facilities using the STFLE insn.
1517 Note, that these facilities were introduced at the same time or later
1518 as STFLE, so the absence of STLFE implies the absence of the facility
1519 we're trying to detect. */
1520 struct fac_hwcaps_map
{
1524 const HChar name
[6]; // may need adjustment for new facility names
1526 { False
, S390_FAC_EIMM
, VEX_HWCAPS_S390X_EIMM
, "EIMM" },
1527 { False
, S390_FAC_GIE
, VEX_HWCAPS_S390X_GIE
, "GIE" },
1528 { False
, S390_FAC_DFP
, VEX_HWCAPS_S390X_DFP
, "DFP" },
1529 { False
, S390_FAC_FPSE
, VEX_HWCAPS_S390X_FGX
, "FGX" },
1530 { False
, S390_FAC_ETF2
, VEX_HWCAPS_S390X_ETF2
, "ETF2" },
1531 { False
, S390_FAC_ETF3
, VEX_HWCAPS_S390X_ETF3
, "ETF3" },
1532 { False
, S390_FAC_STCKF
, VEX_HWCAPS_S390X_STCKF
, "STCKF" },
1533 { False
, S390_FAC_FPEXT
, VEX_HWCAPS_S390X_FPEXT
, "FPEXT" },
1534 { False
, S390_FAC_LSC
, VEX_HWCAPS_S390X_LSC
, "LSC" },
1535 { False
, S390_FAC_PFPO
, VEX_HWCAPS_S390X_PFPO
, "PFPO" },
1536 { False
, S390_FAC_VX
, VEX_HWCAPS_S390X_VX
, "VX" },
1537 { False
, S390_FAC_MSA5
, VEX_HWCAPS_S390X_MSA5
, "MSA5" },
1538 { False
, S390_FAC_MI2
, VEX_HWCAPS_S390X_MI2
, "MI2" },
1541 /* Set hwcaps according to the detected facilities */
1542 UChar dw_number
= 0;
1544 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1545 vg_assert(fac_hwcaps
[i
].facility_bit
<= 191); // for now
1546 dw_number
= fac_hwcaps
[i
].facility_bit
/ 64;
1547 fac_bit
= fac_hwcaps
[i
].facility_bit
% 64;
1548 if (hoststfle
[dw_number
] & (1ULL << (63 - fac_bit
))) {
1549 fac_hwcaps
[i
].installed
= True
;
1550 vai
.hwcaps
|= fac_hwcaps
[i
].hwcaps_bit
;
1554 /* Build up a string showing the probed-for facilities */
1555 HChar fac_str
[(sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]) *
1556 (sizeof fac_hwcaps
[0].name
+ 3) + // %s %d
1557 7 + 1 + 4 + 2 // machine %4d
1560 p
+= VG_(sprintf
)(p
, "machine %4d ", model
);
1561 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1562 p
+= VG_(sprintf
)(p
, " %s %1u", fac_hwcaps
[i
].name
,
1563 fac_hwcaps
[i
].installed
);
1567 VG_(debugLog
)(1, "machine", "%s\n", fac_str
);
1568 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1570 VG_(machine_get_cache_info
)(&vai
);
1575 #elif defined(VGA_arm)
1577 /* Same instruction set detection algorithm as for ppc32. */
1578 vki_sigset_t saved_set
, tmp_set
;
1579 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1580 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1582 volatile Bool have_VFP
, have_VFP2
, have_VFP3
, have_NEON
, have_V8
;
1583 volatile Int archlevel
;
1586 /* This is a kludge. Really we ought to back-convert saved_act
1587 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1588 since that's a no-op on all ppc64 platforms so far supported,
1589 it's not worth the typing effort. At least include most basic
1591 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1593 VG_(sigemptyset
)(&tmp_set
);
1594 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1595 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1597 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1600 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1602 tmp_sigill_act
= saved_sigill_act
;
1604 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1605 tmp_sigfpe_act
= saved_sigfpe_act
;
1607 /* NODEFER: signal handler does not return (from the kernel's point of
1608 view), hence if it is to successfully catch a signal more than once,
1609 we need the NODEFER flag. */
1610 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1611 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1612 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1613 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1614 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1616 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1617 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1618 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1619 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1620 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1624 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1627 __asm__
__volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1629 /* There are several generation of VFP extension but they differs very
1630 little so for now we will not distinguish them. */
1631 have_VFP2
= have_VFP
;
1632 have_VFP3
= have_VFP
;
1636 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1639 __asm__
__volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1642 /* ARM architecture level */
1643 archlevel
= 5; /* v5 will be base level */
1644 if (archlevel
< 7) {
1646 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1649 __asm__
__volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1652 if (archlevel
< 6) {
1654 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1657 __asm__
__volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1663 if (archlevel
== 7) {
1664 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1667 __asm__
__volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1669 if (have_V8
&& have_NEON
&& have_VFP3
) {
1674 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1675 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigfpe_act
, &tmp_sigfpe_act
);
1676 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1677 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1678 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1680 VG_(debugLog
)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1681 archlevel
, (Int
)have_VFP
, (Int
)have_VFP2
, (Int
)have_VFP3
,
1684 VG_(machine_arm_archlevel
) = archlevel
;
1687 vai
.endness
= VexEndnessLE
;
1689 vai
.hwcaps
= VEX_ARM_ARCHLEVEL(archlevel
);
1690 if (have_VFP3
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP3
;
1691 if (have_VFP2
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP2
;
1692 if (have_VFP
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP
;
1693 if (have_NEON
) vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1695 VG_(machine_get_cache_info
)(&vai
);
1700 #elif defined(VGA_arm64)
1703 vai
.endness
= VexEndnessLE
;
1705 /* So far there are no variants. */
1708 VG_(machine_get_cache_info
)(&vai
);
1710 /* Check whether we need to use the fallback LLSC implementation.
1711 If the check fails, give up. */
1712 if (! VG_(parse_cpuinfo
)())
1715 /* 0 denotes 'not set'. The range of legitimate values here,
1716 after being set that is, is 2 though 17 inclusive. */
1717 vg_assert(vai
.arm64_dMinLine_lg2_szB
== 0);
1718 vg_assert(vai
.arm64_iMinLine_lg2_szB
== 0);
1720 __asm__
__volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0
));
1721 vai
.arm64_dMinLine_lg2_szB
= ((ctr_el0
>> 16) & 0xF) + 2;
1722 vai
.arm64_iMinLine_lg2_szB
= ((ctr_el0
>> 0) & 0xF) + 2;
1723 VG_(debugLog
)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1724 "ctr_el0.iMinLine_szB = %d\n",
1725 1 << vai
.arm64_dMinLine_lg2_szB
,
1726 1 << vai
.arm64_iMinLine_lg2_szB
);
1727 VG_(debugLog
)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1728 vai
.arm64_requires_fallback_LLSC
? "yes" : "no");
1733 #elif defined(VGA_mips32)
1735 /* Define the position of F64 bit in FIR register. */
1738 if (!VG_(parse_cpuinfo
)())
1741 # if defined(VKI_LITTLE_ENDIAN)
1742 vai
.endness
= VexEndnessLE
;
1743 # elif defined(VKI_BIG_ENDIAN)
1744 vai
.endness
= VexEndnessBE
;
1746 vai
.endness
= VexEndness_INVALID
;
1749 /* Same instruction set detection algorithm as for ppc32/arm... */
1750 vki_sigset_t saved_set
, tmp_set
;
1751 vki_sigaction_fromK_t saved_sigill_act
;
1752 vki_sigaction_toK_t tmp_sigill_act
;
1754 volatile Bool have_DSP
, have_DSPr2
, have_MSA
;
1757 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1759 VG_(sigemptyset
)(&tmp_set
);
1760 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1762 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1765 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1767 tmp_sigill_act
= saved_sigill_act
;
1769 /* NODEFER: signal handler does not return (from the kernel's point of
1770 view), hence if it is to successfully catch a signal more than once,
1771 we need the NODEFER flag. */
1772 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1773 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1774 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1775 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1776 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1778 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
1780 /* MSA instructions. */
1782 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1785 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1788 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
1790 /* DSPr2 instructions. */
1792 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1795 __asm__
__volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1798 /* We assume it's 74K, since it can run DSPr2. */
1799 vai
.hwcaps
|= VEX_PRID_IMP_74K
;
1801 /* DSP instructions. */
1803 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1806 __asm__
__volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1809 /* We assume it's 34K, since it has support for DSP. */
1810 vai
.hwcaps
|= VEX_PRID_IMP_34K
;
1816 # if defined(VGP_mips32_linux)
1817 Int fpmode
= VG_(prctl
)(VKI_PR_GET_FP_MODE
, 0, 0, 0, 0);
1823 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1824 we are using alternative way to determine FP mode */
1827 if (!VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1830 ".set noreorder\n\t"
1832 ".set hardfloat\n\t"
1833 "lui $t0, 0x3FF0\n\t"
1840 : "t0", "$f0", "$f1", "memory");
1842 fpmode
= (result
!= 0x3FF0000000000000ull
);
1847 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
1849 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1850 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1851 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1853 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1854 VG_(machine_get_cache_info
)(&vai
);
1859 #elif defined(VGA_mips64)
1862 if (!VG_(parse_cpuinfo
)())
1865 # if defined(VKI_LITTLE_ENDIAN)
1866 vai
.endness
= VexEndnessLE
;
1867 # elif defined(VKI_BIG_ENDIAN)
1868 vai
.endness
= VexEndnessBE
;
1870 vai
.endness
= VexEndness_INVALID
;
1873 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
1875 /* Same instruction set detection algorithm as for ppc32/arm... */
1876 vki_sigset_t saved_set
, tmp_set
;
1877 vki_sigaction_fromK_t saved_sigill_act
;
1878 vki_sigaction_toK_t tmp_sigill_act
;
1880 volatile Bool have_MSA
;
1883 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1885 VG_(sigemptyset
)(&tmp_set
);
1886 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1888 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1891 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1893 tmp_sigill_act
= saved_sigill_act
;
1895 /* NODEFER: signal handler does not return (from the kernel's point of
1896 view), hence if it is to successfully catch a signal more than once,
1897 we need the NODEFER flag. */
1898 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1899 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1900 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1901 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1902 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1904 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
1906 /* MSA instructions */
1908 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1911 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1914 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
1918 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1919 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1920 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1922 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1924 VG_(machine_get_cache_info
)(&vai
);
1930 # error "Unknown arch"
1934 /* Notify host cpu instruction cache line size. */
1935 #if defined(VGA_ppc32)
1936 void VG_(machine_ppc32_set_clszB
)( Int szB
)
1938 vg_assert(hwcaps_done
);
1940 /* Either the value must not have been set yet (zero) or we can
1941 tolerate it being set to the same value multiple times, as the
1942 stack scanning logic in m_main is a bit stupid. */
1943 vg_assert(vai
.ppc_icache_line_szB
== 0
1944 || vai
.ppc_icache_line_szB
== szB
);
1946 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
1947 vai
.ppc_icache_line_szB
= szB
;
1952 /* Notify host cpu instruction cache line size. */
1953 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1954 void VG_(machine_ppc64_set_clszB
)( Int szB
)
1956 vg_assert(hwcaps_done
);
1958 /* Either the value must not have been set yet (zero) or we can
1959 tolerate it being set to the same value multiple times, as the
1960 stack scanning logic in m_main is a bit stupid. */
1961 vg_assert(vai
.ppc_icache_line_szB
== 0
1962 || vai
.ppc_icache_line_szB
== szB
);
1964 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
1965 vai
.ppc_icache_line_szB
= szB
;
1970 /* Notify host's ability to handle NEON instructions. */
1971 #if defined(VGA_arm)
1972 void VG_(machine_arm_set_has_NEON
)( Bool has_neon
)
1974 vg_assert(hwcaps_done
);
1975 /* There's nothing else we can sanity check. */
1978 vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1980 vai
.hwcaps
&= ~VEX_HWCAPS_ARM_NEON
;
1986 /* Fetch host cpu info, once established. */
1987 void VG_(machine_get_VexArchInfo
)( /*OUT*/VexArch
* pVa
,
1988 /*OUT*/VexArchInfo
* pVai
)
1990 vg_assert(hwcaps_done
);
1992 if (pVai
) *pVai
= vai
;
1996 /* Returns the size of the largest guest register that we will
1997 simulate in this run. This depends on both the guest architecture
1998 and on the specific capabilities we are simulating for that guest
1999 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
2000 or 32. General rule: if in doubt, return a value larger than
2003 This information is needed by Cachegrind and Callgrind to decide
2004 what the minimum cache line size they are prepared to simulate is.
2005 Basically require that the minimum cache line size is at least as
2006 large as the largest register that might get transferred to/from
2007 memory, so as to guarantee that any such transaction can straddle
2008 at most 2 cache lines.
2010 Int
VG_(machine_get_size_of_largest_guest_register
) ( void )
2012 vg_assert(hwcaps_done
);
2013 /* Once hwcaps_done is True, we can fish around inside va/vai to
2014 find the information we need. */
2016 # if defined(VGA_x86)
2017 vg_assert(va
== VexArchX86
);
2018 /* We don't support AVX, so 32 is out. At the other end, even if
2019 we don't support any SSE, the X87 can generate 10 byte
2020 transfers, so let's say 16 to be on the safe side. Hence the
2021 answer is always 16. */
2024 # elif defined(VGA_amd64)
2025 /* if AVX then 32 else 16 */
2026 return (vai
.hwcaps
& VEX_HWCAPS_AMD64_AVX
) ? 32 : 16;
2028 # elif defined(VGA_ppc32)
2029 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2030 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_V
) return 16;
2031 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_VX
) return 16;
2032 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_DFP
) return 16;
2035 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2036 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2037 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_V
) return 16;
2038 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_VX
) return 16;
2039 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_DFP
) return 16;
2042 # elif defined(VGA_s390x)
2045 # elif defined(VGA_arm)
2046 /* Really it depends whether or not we have NEON, but let's just
2047 assume we always do. */
2050 # elif defined(VGA_arm64)
2051 /* ARM64 always has Neon, AFAICS. */
2054 # elif defined(VGA_mips32)
2055 /* The guest state implies 4, but that can't really be true, can
2059 # elif defined(VGA_mips64)
2063 # error "Unknown arch"
2068 // Given a pointer to a function as obtained by "& functionname" in C,
2069 // produce a pointer to the actual entry point for the function.
2070 void* VG_(fnptr_to_fnentry
)( void* f
)
2072 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2073 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2074 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2075 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2076 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2077 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2079 # elif defined(VGP_ppc64be_linux)
2080 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2081 3-word function descriptor, of which the first word is the entry
2083 UWord
* descr
= (UWord
*)f
;
2084 return (void*)(descr
[0]);
2086 # error "Unknown platform"
2090 /*--------------------------------------------------------------------*/
2092 /*--------------------------------------------------------------------*/