1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation
9 Copyright (C) 2000-2017 Julian Seward
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 #include "pub_core_basics.h"
29 #include "pub_core_vki.h"
30 #include "pub_core_threadstate.h"
31 #include "pub_core_libcassert.h"
32 #include "pub_core_libcbase.h"
33 #include "pub_core_libcfile.h"
34 #include "pub_core_libcprint.h"
35 #include "pub_core_libcproc.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_machine.h"
38 #include "pub_core_cpuid.h"
39 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
40 #include "pub_core_debuglog.h"
43 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
44 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
45 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47 #define STACK_PTR_S1(regs) ((regs).vex_shadow1.VG_STACK_PTR)
49 Addr
VG_(get_IP
) ( ThreadId tid
) {
50 return INSTR_PTR( VG_(threads
)[tid
].arch
);
52 Addr
VG_(get_SP
) ( ThreadId tid
) {
53 return STACK_PTR( VG_(threads
)[tid
].arch
);
55 Addr
VG_(get_FP
) ( ThreadId tid
) {
56 return FRAME_PTR( VG_(threads
)[tid
].arch
);
59 Addr
VG_(get_SP_s1
) ( ThreadId tid
) {
60 return STACK_PTR_S1( VG_(threads
)[tid
].arch
);
62 void VG_(set_SP_s1
) ( ThreadId tid
, Addr sp
) {
63 STACK_PTR_S1( VG_(threads
)[tid
].arch
) = sp
;
66 void VG_(set_IP
) ( ThreadId tid
, Addr ip
) {
67 INSTR_PTR( VG_(threads
)[tid
].arch
) = ip
;
69 void VG_(set_SP
) ( ThreadId tid
, Addr sp
) {
70 STACK_PTR( VG_(threads
)[tid
].arch
) = sp
;
73 void VG_(get_UnwindStartRegs
) ( /*OUT*/UnwindStartRegs
* regs
,
77 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_EIP
;
78 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_ESP
;
80 = VG_(threads
)[tid
].arch
.vex
.guest_EBP
;
81 # elif defined(VGA_amd64)
82 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_RIP
;
83 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_RSP
;
84 regs
->misc
.AMD64
.r_rbp
85 = VG_(threads
)[tid
].arch
.vex
.guest_RBP
;
86 # elif defined(VGA_ppc32)
87 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
88 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
90 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
91 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
92 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_CIA
;
93 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_GPR1
;
95 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
96 # elif defined(VGA_arm)
97 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R15T
;
98 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_R13
;
100 = VG_(threads
)[tid
].arch
.vex
.guest_R14
;
102 = VG_(threads
)[tid
].arch
.vex
.guest_R12
;
104 = VG_(threads
)[tid
].arch
.vex
.guest_R11
;
106 = VG_(threads
)[tid
].arch
.vex
.guest_R7
;
107 # elif defined(VGA_arm64)
108 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
109 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_XSP
;
110 regs
->misc
.ARM64
.x29
= VG_(threads
)[tid
].arch
.vex
.guest_X29
;
111 regs
->misc
.ARM64
.x30
= VG_(threads
)[tid
].arch
.vex
.guest_X30
;
112 # elif defined(VGA_s390x)
113 regs
->r_pc
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_IA
;
114 regs
->r_sp
= (ULong
)VG_(threads
)[tid
].arch
.vex
.guest_SP
;
115 regs
->misc
.S390X
.r_fp
116 = VG_(threads
)[tid
].arch
.vex
.guest_FP
;
117 regs
->misc
.S390X
.r_lr
118 = VG_(threads
)[tid
].arch
.vex
.guest_LR
;
119 /* ANDREAS 3 Apr 2019 FIXME r_f0..r_f7: is this correct? */
120 regs
->misc
.S390X
.r_f0
121 = VG_(threads
)[tid
].arch
.vex
.guest_v0
.w64
[0];
122 regs
->misc
.S390X
.r_f1
123 = VG_(threads
)[tid
].arch
.vex
.guest_v1
.w64
[0];
124 regs
->misc
.S390X
.r_f2
125 = VG_(threads
)[tid
].arch
.vex
.guest_v2
.w64
[0];
126 regs
->misc
.S390X
.r_f3
127 = VG_(threads
)[tid
].arch
.vex
.guest_v3
.w64
[0];
128 regs
->misc
.S390X
.r_f4
129 = VG_(threads
)[tid
].arch
.vex
.guest_v4
.w64
[0];
130 regs
->misc
.S390X
.r_f5
131 = VG_(threads
)[tid
].arch
.vex
.guest_v5
.w64
[0];
132 regs
->misc
.S390X
.r_f6
133 = VG_(threads
)[tid
].arch
.vex
.guest_v6
.w64
[0];
134 regs
->misc
.S390X
.r_f7
135 = VG_(threads
)[tid
].arch
.vex
.guest_v7
.w64
[0];
136 # elif defined(VGA_mips32)
137 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
138 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
139 regs
->misc
.MIPS32
.r30
140 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
141 regs
->misc
.MIPS32
.r31
142 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
143 regs
->misc
.MIPS32
.r28
144 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
145 # elif defined(VGA_mips64)
146 regs
->r_pc
= VG_(threads
)[tid
].arch
.vex
.guest_PC
;
147 regs
->r_sp
= VG_(threads
)[tid
].arch
.vex
.guest_r29
;
148 regs
->misc
.MIPS64
.r30
149 = VG_(threads
)[tid
].arch
.vex
.guest_r30
;
150 regs
->misc
.MIPS64
.r31
151 = VG_(threads
)[tid
].arch
.vex
.guest_r31
;
152 regs
->misc
.MIPS64
.r28
153 = VG_(threads
)[tid
].arch
.vex
.guest_r28
;
155 # error "Unknown arch"
160 VG_(get_shadow_regs_area
) ( ThreadId tid
,
162 /*SRC*/Int shadowNo
, PtrdiffT offset
, SizeT size
)
166 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
167 vg_assert(VG_(is_valid_tid
)(tid
));
169 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
170 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
172 tst
= & VG_(threads
)[tid
];
175 case 0: src
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
176 case 1: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
177 case 2: src
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
179 vg_assert(src
!= NULL
);
180 VG_(memcpy
)( dst
, src
, size
);
184 VG_(set_shadow_regs_area
) ( ThreadId tid
,
185 /*DST*/Int shadowNo
, PtrdiffT offset
, SizeT size
,
186 /*SRC*/const UChar
* src
)
190 vg_assert(shadowNo
== 0 || shadowNo
== 1 || shadowNo
== 2);
191 vg_assert(VG_(is_valid_tid
)(tid
));
193 vg_assert(0 <= offset
&& offset
< sizeof(VexGuestArchState
));
194 vg_assert(offset
+ size
<= sizeof(VexGuestArchState
));
196 tst
= & VG_(threads
)[tid
];
199 case 0: dst
= (void*)(((Addr
)&(tst
->arch
.vex
)) + offset
); break;
200 case 1: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow1
)) + offset
); break;
201 case 2: dst
= (void*)(((Addr
)&(tst
->arch
.vex_shadow2
)) + offset
); break;
203 vg_assert(dst
!= NULL
);
204 VG_(memcpy
)( dst
, src
, size
);
208 static void apply_to_GPs_of_tid(ThreadId tid
, void (*f
)(ThreadId
,
211 VexGuestArchState
* vex
= &(VG_(get_ThreadState
)(tid
)->arch
.vex
);
212 VG_(debugLog
)(2, "machine", "apply_to_GPs_of_tid %u\n", tid
);
214 (*f
)(tid
, "EAX", vex
->guest_EAX
);
215 (*f
)(tid
, "ECX", vex
->guest_ECX
);
216 (*f
)(tid
, "EDX", vex
->guest_EDX
);
217 (*f
)(tid
, "EBX", vex
->guest_EBX
);
218 (*f
)(tid
, "ESI", vex
->guest_ESI
);
219 (*f
)(tid
, "EDI", vex
->guest_EDI
);
220 (*f
)(tid
, "ESP", vex
->guest_ESP
);
221 (*f
)(tid
, "EBP", vex
->guest_EBP
);
222 #elif defined(VGA_amd64)
223 (*f
)(tid
, "RAX", vex
->guest_RAX
);
224 (*f
)(tid
, "RCX", vex
->guest_RCX
);
225 (*f
)(tid
, "RDX", vex
->guest_RDX
);
226 (*f
)(tid
, "RBX", vex
->guest_RBX
);
227 (*f
)(tid
, "RSI", vex
->guest_RSI
);
228 (*f
)(tid
, "RDI", vex
->guest_RDI
);
229 (*f
)(tid
, "RSP", vex
->guest_RSP
);
230 (*f
)(tid
, "RBP", vex
->guest_RBP
);
231 (*f
)(tid
, "R8" , vex
->guest_R8
);
232 (*f
)(tid
, "R9" , vex
->guest_R9
);
233 (*f
)(tid
, "R10", vex
->guest_R10
);
234 (*f
)(tid
, "R11", vex
->guest_R11
);
235 (*f
)(tid
, "R12", vex
->guest_R12
);
236 (*f
)(tid
, "R13", vex
->guest_R13
);
237 (*f
)(tid
, "R14", vex
->guest_R14
);
238 (*f
)(tid
, "R15", vex
->guest_R15
);
239 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
240 (*f
)(tid
, "GPR0" , vex
->guest_GPR0
);
241 (*f
)(tid
, "GPR1" , vex
->guest_GPR1
);
242 (*f
)(tid
, "GPR2" , vex
->guest_GPR2
);
243 (*f
)(tid
, "GPR3" , vex
->guest_GPR3
);
244 (*f
)(tid
, "GPR4" , vex
->guest_GPR4
);
245 (*f
)(tid
, "GPR5" , vex
->guest_GPR5
);
246 (*f
)(tid
, "GPR6" , vex
->guest_GPR6
);
247 (*f
)(tid
, "GPR7" , vex
->guest_GPR7
);
248 (*f
)(tid
, "GPR8" , vex
->guest_GPR8
);
249 (*f
)(tid
, "GPR9" , vex
->guest_GPR9
);
250 (*f
)(tid
, "GPR10", vex
->guest_GPR10
);
251 (*f
)(tid
, "GPR11", vex
->guest_GPR11
);
252 (*f
)(tid
, "GPR12", vex
->guest_GPR12
);
253 (*f
)(tid
, "GPR13", vex
->guest_GPR13
);
254 (*f
)(tid
, "GPR14", vex
->guest_GPR14
);
255 (*f
)(tid
, "GPR15", vex
->guest_GPR15
);
256 (*f
)(tid
, "GPR16", vex
->guest_GPR16
);
257 (*f
)(tid
, "GPR17", vex
->guest_GPR17
);
258 (*f
)(tid
, "GPR18", vex
->guest_GPR18
);
259 (*f
)(tid
, "GPR19", vex
->guest_GPR19
);
260 (*f
)(tid
, "GPR20", vex
->guest_GPR20
);
261 (*f
)(tid
, "GPR21", vex
->guest_GPR21
);
262 (*f
)(tid
, "GPR22", vex
->guest_GPR22
);
263 (*f
)(tid
, "GPR23", vex
->guest_GPR23
);
264 (*f
)(tid
, "GPR24", vex
->guest_GPR24
);
265 (*f
)(tid
, "GPR25", vex
->guest_GPR25
);
266 (*f
)(tid
, "GPR26", vex
->guest_GPR26
);
267 (*f
)(tid
, "GPR27", vex
->guest_GPR27
);
268 (*f
)(tid
, "GPR28", vex
->guest_GPR28
);
269 (*f
)(tid
, "GPR29", vex
->guest_GPR29
);
270 (*f
)(tid
, "GPR30", vex
->guest_GPR30
);
271 (*f
)(tid
, "GPR31", vex
->guest_GPR31
);
272 (*f
)(tid
, "CTR" , vex
->guest_CTR
);
273 (*f
)(tid
, "LR" , vex
->guest_LR
);
274 #elif defined(VGA_arm)
275 (*f
)(tid
, "R0" , vex
->guest_R0
);
276 (*f
)(tid
, "R1" , vex
->guest_R1
);
277 (*f
)(tid
, "R2" , vex
->guest_R2
);
278 (*f
)(tid
, "R3" , vex
->guest_R3
);
279 (*f
)(tid
, "R4" , vex
->guest_R4
);
280 (*f
)(tid
, "R5" , vex
->guest_R5
);
281 (*f
)(tid
, "R6" , vex
->guest_R6
);
282 (*f
)(tid
, "R8" , vex
->guest_R8
);
283 (*f
)(tid
, "R9" , vex
->guest_R9
);
284 (*f
)(tid
, "R10", vex
->guest_R10
);
285 (*f
)(tid
, "R11", vex
->guest_R11
);
286 (*f
)(tid
, "R12", vex
->guest_R12
);
287 (*f
)(tid
, "R13", vex
->guest_R13
);
288 (*f
)(tid
, "R14", vex
->guest_R14
);
289 #elif defined(VGA_s390x)
290 (*f
)(tid
, "r0" , vex
->guest_r0
);
291 (*f
)(tid
, "r1" , vex
->guest_r1
);
292 (*f
)(tid
, "r2" , vex
->guest_r2
);
293 (*f
)(tid
, "r3" , vex
->guest_r3
);
294 (*f
)(tid
, "r4" , vex
->guest_r4
);
295 (*f
)(tid
, "r5" , vex
->guest_r5
);
296 (*f
)(tid
, "r6" , vex
->guest_r6
);
297 (*f
)(tid
, "r7" , vex
->guest_r7
);
298 (*f
)(tid
, "r8" , vex
->guest_r8
);
299 (*f
)(tid
, "r9" , vex
->guest_r9
);
300 (*f
)(tid
, "r10", vex
->guest_r10
);
301 (*f
)(tid
, "r11", vex
->guest_r11
);
302 (*f
)(tid
, "r12", vex
->guest_r12
);
303 (*f
)(tid
, "r13", vex
->guest_r13
);
304 (*f
)(tid
, "r14", vex
->guest_r14
);
305 (*f
)(tid
, "r15", vex
->guest_r15
);
306 #elif defined(VGA_mips32) || defined(VGA_mips64)
307 (*f
)(tid
, "r0" , vex
->guest_r0
);
308 (*f
)(tid
, "r1" , vex
->guest_r1
);
309 (*f
)(tid
, "r2" , vex
->guest_r2
);
310 (*f
)(tid
, "r3" , vex
->guest_r3
);
311 (*f
)(tid
, "r4" , vex
->guest_r4
);
312 (*f
)(tid
, "r5" , vex
->guest_r5
);
313 (*f
)(tid
, "r6" , vex
->guest_r6
);
314 (*f
)(tid
, "r7" , vex
->guest_r7
);
315 (*f
)(tid
, "r8" , vex
->guest_r8
);
316 (*f
)(tid
, "r9" , vex
->guest_r9
);
317 (*f
)(tid
, "r10", vex
->guest_r10
);
318 (*f
)(tid
, "r11", vex
->guest_r11
);
319 (*f
)(tid
, "r12", vex
->guest_r12
);
320 (*f
)(tid
, "r13", vex
->guest_r13
);
321 (*f
)(tid
, "r14", vex
->guest_r14
);
322 (*f
)(tid
, "r15", vex
->guest_r15
);
323 (*f
)(tid
, "r16", vex
->guest_r16
);
324 (*f
)(tid
, "r17", vex
->guest_r17
);
325 (*f
)(tid
, "r18", vex
->guest_r18
);
326 (*f
)(tid
, "r19", vex
->guest_r19
);
327 (*f
)(tid
, "r20", vex
->guest_r20
);
328 (*f
)(tid
, "r21", vex
->guest_r21
);
329 (*f
)(tid
, "r22", vex
->guest_r22
);
330 (*f
)(tid
, "r23", vex
->guest_r23
);
331 (*f
)(tid
, "r24", vex
->guest_r24
);
332 (*f
)(tid
, "r25", vex
->guest_r25
);
333 (*f
)(tid
, "r26", vex
->guest_r26
);
334 (*f
)(tid
, "r27", vex
->guest_r27
);
335 (*f
)(tid
, "r28", vex
->guest_r28
);
336 (*f
)(tid
, "r29", vex
->guest_r29
);
337 (*f
)(tid
, "r30", vex
->guest_r30
);
338 (*f
)(tid
, "r31", vex
->guest_r31
);
339 #elif defined(VGA_arm64)
340 (*f
)(tid
, "x0" , vex
->guest_X0
);
341 (*f
)(tid
, "x1" , vex
->guest_X1
);
342 (*f
)(tid
, "x2" , vex
->guest_X2
);
343 (*f
)(tid
, "x3" , vex
->guest_X3
);
344 (*f
)(tid
, "x4" , vex
->guest_X4
);
345 (*f
)(tid
, "x5" , vex
->guest_X5
);
346 (*f
)(tid
, "x6" , vex
->guest_X6
);
347 (*f
)(tid
, "x7" , vex
->guest_X7
);
348 (*f
)(tid
, "x8" , vex
->guest_X8
);
349 (*f
)(tid
, "x9" , vex
->guest_X9
);
350 (*f
)(tid
, "x10", vex
->guest_X10
);
351 (*f
)(tid
, "x11", vex
->guest_X11
);
352 (*f
)(tid
, "x12", vex
->guest_X12
);
353 (*f
)(tid
, "x13", vex
->guest_X13
);
354 (*f
)(tid
, "x14", vex
->guest_X14
);
355 (*f
)(tid
, "x15", vex
->guest_X15
);
356 (*f
)(tid
, "x16", vex
->guest_X16
);
357 (*f
)(tid
, "x17", vex
->guest_X17
);
358 (*f
)(tid
, "x18", vex
->guest_X18
);
359 (*f
)(tid
, "x19", vex
->guest_X19
);
360 (*f
)(tid
, "x20", vex
->guest_X20
);
361 (*f
)(tid
, "x21", vex
->guest_X21
);
362 (*f
)(tid
, "x22", vex
->guest_X22
);
363 (*f
)(tid
, "x23", vex
->guest_X23
);
364 (*f
)(tid
, "x24", vex
->guest_X24
);
365 (*f
)(tid
, "x25", vex
->guest_X25
);
366 (*f
)(tid
, "x26", vex
->guest_X26
);
367 (*f
)(tid
, "x27", vex
->guest_X27
);
368 (*f
)(tid
, "x28", vex
->guest_X28
);
369 (*f
)(tid
, "x29", vex
->guest_X29
);
370 (*f
)(tid
, "x30", vex
->guest_X30
);
377 void VG_(apply_to_GP_regs
)(void (*f
)(ThreadId
, const HChar
*, UWord
))
381 for (tid
= 1; tid
< VG_N_THREADS
; tid
++) {
382 if (VG_(is_valid_tid
)(tid
)
383 || VG_(threads
)[tid
].exitreason
== VgSrc_ExitProcess
) {
384 // live thread or thread instructed to die by another thread that
386 apply_to_GPs_of_tid(tid
, f
);
391 void VG_(thread_stack_reset_iter
)(/*OUT*/ThreadId
* tid
)
393 *tid
= (ThreadId
)(-1);
396 Bool
VG_(thread_stack_next
)(/*MOD*/ThreadId
* tid
,
397 /*OUT*/Addr
* stack_min
,
398 /*OUT*/Addr
* stack_max
)
401 for (i
= (*tid
)+1; i
< VG_N_THREADS
; i
++) {
402 if (i
== VG_INVALID_THREADID
)
404 if (VG_(threads
)[i
].status
!= VgTs_Empty
) {
406 *stack_min
= VG_(get_SP
)(i
);
407 *stack_max
= VG_(threads
)[i
].client_stack_highest_byte
;
414 Addr
VG_(thread_get_stack_max
)(ThreadId tid
)
416 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
417 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
418 return VG_(threads
)[tid
].client_stack_highest_byte
;
421 SizeT
VG_(thread_get_stack_size
)(ThreadId tid
)
423 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
424 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
425 return VG_(threads
)[tid
].client_stack_szB
;
428 Addr
VG_(thread_get_altstack_min
)(ThreadId tid
)
430 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
431 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
432 return (Addr
)VG_(threads
)[tid
].altstack
.ss_sp
;
435 SizeT
VG_(thread_get_altstack_size
)(ThreadId tid
)
437 vg_assert(0 <= tid
&& tid
< VG_N_THREADS
&& tid
!= VG_INVALID_THREADID
);
438 vg_assert(VG_(threads
)[tid
].status
!= VgTs_Empty
);
439 return VG_(threads
)[tid
].altstack
.ss_size
;
442 //-------------------------------------------------------------
443 /* Details about the capabilities of the underlying (host) CPU. These
444 details are acquired by (1) enquiring with the CPU at startup, or
445 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
446 line size). It's a bit nasty in the sense that there's no obvious
447 way to stop uses of some of this info before it's ready to go.
448 See pub_core_machine.h for more information about that.
450 VG_(machine_get_hwcaps) may use signals (although it attempts to
451 leave signal state unchanged) and therefore should only be
452 called before m_main sets up the client's signal state.
455 /* --------- State --------- */
456 static Bool hwcaps_done
= False
;
458 /* --- all archs --- */
459 static VexArch va
= VexArch_INVALID
;
460 static VexArchInfo vai
;
463 UInt
VG_(machine_x86_have_mxcsr
) = 0;
465 #if defined(VGA_ppc32)
466 UInt
VG_(machine_ppc32_has_FP
) = 0;
467 UInt
VG_(machine_ppc32_has_VMX
) = 0;
469 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
470 ULong
VG_(machine_ppc64_has_VMX
) = 0;
473 Int
VG_(machine_arm_archlevel
) = 4;
477 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
478 testing, so we need a VG_MINIMAL_JMP_BUF. */
479 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
480 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) || defined(VGA_mips64)
481 #include "pub_core_libcsetjmp.h"
482 static VG_MINIMAL_JMP_BUF(env_unsup_insn
);
483 static void handler_unsup_insn ( Int x
) {
484 VG_MINIMAL_LONGJMP(env_unsup_insn
);
489 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
490 * handlers are installed. Determines the sizes affected by dcbz
491 * and dcbzl instructions and updates the given VexArchInfo structure
494 * Not very defensive: assumes that as long as the dcbz/dcbzl
495 * instructions don't raise a SIGILL, that they will zero an aligned,
496 * contiguous block of memory of a sensible size. */
497 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
498 static void find_ppc_dcbz_sz(VexArchInfo
*arch_info
)
502 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
503 char test_block
[4*MAX_DCBZL_SZB
];
504 char *aligned
= test_block
;
507 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
508 aligned
= (char *)(((HWord
)aligned
+ MAX_DCBZL_SZB
) & ~(MAX_DCBZL_SZB
- 1));
509 vg_assert((aligned
+ MAX_DCBZL_SZB
) <= &test_block
[sizeof(test_block
)]);
511 /* dcbz often clears 32B, although sometimes whatever the native cache
513 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
514 __asm__
__volatile__("dcbz 0,%0"
516 : "r" (aligned
) /*in*/
517 : "memory" /*clobber*/);
518 for (dcbz_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
522 vg_assert(dcbz_szB
== 16 || dcbz_szB
== 32 || dcbz_szB
== 64 || dcbz_szB
== 128);
524 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
525 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
526 dcbzl_szB
= 0; /* indicates unsupported */
529 VG_(memset
)(test_block
, 0xff, sizeof(test_block
));
530 /* some older assemblers won't understand the dcbzl instruction
531 * variant, so we directly emit the instruction ourselves */
532 __asm__
__volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
534 : "r" (aligned
) /*in*/
535 : "memory", "r9" /*clobber*/);
536 for (dcbzl_szB
= 0, i
= 0; i
< sizeof(test_block
); ++i
) {
540 vg_assert(dcbzl_szB
== 16 || dcbzl_szB
== 32 || dcbzl_szB
== 64 || dcbzl_szB
== 128);
543 arch_info
->ppc_dcbz_szB
= dcbz_szB
;
544 arch_info
->ppc_dcbzl_szB
= dcbzl_szB
;
546 VG_(debugLog
)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
547 dcbz_szB
, dcbzl_szB
);
548 # undef MAX_DCBZL_SZB
550 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
554 /* Read /proc/cpuinfo. Look for lines like these
556 processor 0: version = FF, identification = 0117C9, machine = 2064
558 and return the machine model. If the machine model could not be determined
559 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
561 static UInt
VG_(get_machine_model
)(void)
563 static struct model_map
{
567 { "2064", VEX_S390X_MODEL_Z900
},
568 { "2066", VEX_S390X_MODEL_Z800
},
569 { "2084", VEX_S390X_MODEL_Z990
},
570 { "2086", VEX_S390X_MODEL_Z890
},
571 { "2094", VEX_S390X_MODEL_Z9_EC
},
572 { "2096", VEX_S390X_MODEL_Z9_BC
},
573 { "2097", VEX_S390X_MODEL_Z10_EC
},
574 { "2098", VEX_S390X_MODEL_Z10_BC
},
575 { "2817", VEX_S390X_MODEL_Z196
},
576 { "2818", VEX_S390X_MODEL_Z114
},
577 { "2827", VEX_S390X_MODEL_ZEC12
},
578 { "2828", VEX_S390X_MODEL_ZBC12
},
579 { "2964", VEX_S390X_MODEL_Z13
},
580 { "2965", VEX_S390X_MODEL_Z13S
},
585 SizeT num_bytes
, file_buf_size
;
586 HChar
*p
, *m
, *model_name
, *file_buf
;
588 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
589 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
590 if ( sr_isError(fd
) ) return VEX_S390X_MODEL_UNKNOWN
;
594 /* Determine the size of /proc/cpuinfo.
595 Work around broken-ness in /proc file system implementation.
596 fstat returns a zero size for /proc/cpuinfo although it is
597 claimed to be a regular file. */
599 file_buf_size
= 1000;
600 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
602 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
606 if (n
< file_buf_size
) break; /* reached EOF */
609 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
611 if (num_bytes
> file_buf_size
) {
612 VG_(free
)( file_buf
);
613 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
614 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
615 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
616 if (n
< 0) num_bytes
= 0;
619 file_buf
[num_bytes
] = '\0';
623 model
= VEX_S390X_MODEL_UNKNOWN
;
624 for (p
= file_buf
; *p
; ++p
) {
625 /* Beginning of line */
626 if (VG_(strncmp
)( p
, "processor", sizeof "processor" - 1 ) != 0) continue;
628 m
= VG_(strstr
)( p
, "machine" );
629 if (m
== NULL
) continue;
631 p
= m
+ sizeof "machine" - 1;
632 while ( VG_(isspace
)( *p
) || *p
== '=') {
633 if (*p
== '\n') goto next_line
;
638 for (n
= 0; n
< sizeof model_map
/ sizeof model_map
[0]; ++n
) {
639 struct model_map
*mm
= model_map
+ n
;
640 SizeT len
= VG_(strlen
)( mm
->name
);
641 if ( VG_(strncmp
)( mm
->name
, model_name
, len
) == 0 &&
642 VG_(isspace
)( model_name
[len
] )) {
643 if (mm
->id
< model
) model
= mm
->id
;
644 p
= model_name
+ len
;
648 /* Skip until end-of-line */
654 VG_(free
)( file_buf
);
655 VG_(debugLog
)(1, "machine", "model = %s\n",
656 model
== VEX_S390X_MODEL_UNKNOWN
? "UNKNOWN"
657 : model_map
[model
].name
);
661 #endif /* defined(VGA_s390x) */
663 #if defined(VGA_mips32) || defined(VGA_mips64)
666 * Initialize hwcaps by parsing /proc/cpuinfo . Returns False if it can not
667 * determine what CPU it is (it searches only for the models that are or may be
668 * supported by Valgrind).
670 static Bool
VG_(parse_cpuinfo
)(void)
672 const char *search_Broadcom_str
= "cpu model\t\t: Broadcom";
673 const char *search_Cavium_str
= "cpu model\t\t: Cavium";
674 const char *search_Ingenic_str
= "cpu model\t\t: Ingenic";
675 const char *search_Loongson_str
= "cpu model\t\t: ICT Loongson";
676 const char *search_MIPS_str
= "cpu model\t\t: MIPS";
677 const char *search_Netlogic_str
= "cpu model\t\t: Netlogic";
681 SizeT num_bytes
, file_buf_size
;
682 HChar
*file_buf
, *isa
;
684 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
685 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
686 if ( sr_isError(fd
) ) return False
;
690 /* Determine the size of /proc/cpuinfo.
691 Work around broken-ness in /proc file system implementation.
692 fstat returns a zero size for /proc/cpuinfo although it is
693 claimed to be a regular file. */
695 file_buf_size
= 1000;
696 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
698 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
702 if (n
< file_buf_size
) break; /* reached EOF */
705 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
707 if (num_bytes
> file_buf_size
) {
708 VG_(free
)( file_buf
);
709 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
710 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
711 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
712 if (n
< 0) num_bytes
= 0;
715 file_buf
[num_bytes
] = '\0';
719 if (VG_(strstr
)(file_buf
, search_Broadcom_str
) != NULL
)
720 vai
.hwcaps
= VEX_PRID_COMP_BROADCOM
;
721 else if (VG_(strstr
)(file_buf
, search_Netlogic_str
) != NULL
)
722 vai
.hwcaps
= VEX_PRID_COMP_NETLOGIC
;
723 else if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
724 vai
.hwcaps
= VEX_PRID_COMP_CAVIUM
;
725 else if (VG_(strstr
)(file_buf
, search_MIPS_str
) != NULL
)
726 vai
.hwcaps
= VEX_PRID_COMP_MIPS
;
727 else if (VG_(strstr
)(file_buf
, search_Ingenic_str
) != NULL
)
728 vai
.hwcaps
= VEX_PRID_COMP_INGENIC_E1
;
729 else if (VG_(strstr
)(file_buf
, search_Loongson_str
) != NULL
)
730 vai
.hwcaps
= (VEX_PRID_COMP_LEGACY
| VEX_PRID_IMP_LOONGSON_64
);
732 /* Did not find string in the proc file. */
738 isa
= VG_(strstr
)(file_buf
, "isa\t\t\t: ");
741 if (VG_(strstr
) (isa
, "mips32r1") != NULL
)
742 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
743 if (VG_(strstr
) (isa
, "mips32r2") != NULL
)
744 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
745 if (VG_(strstr
) (isa
, "mips32r6") != NULL
)
746 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R6
;
747 if (VG_(strstr
) (isa
, "mips64r1") != NULL
)
748 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R1
;
749 if (VG_(strstr
) (isa
, "mips64r2") != NULL
)
750 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
;
751 if (VG_(strstr
) (isa
, "mips64r6") != NULL
)
752 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R6
;
755 * TODO(petarj): Remove this Cavium workaround once Linux kernel folks
756 * decide to change incorrect settings in
757 * mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h.
758 * The current settings show mips32r1, mips32r2 and mips64r1 as
759 * unsupported ISAs by Cavium MIPS CPUs.
761 if (VEX_MIPS_COMP_ID(vai
.hwcaps
) == VEX_PRID_COMP_CAVIUM
) {
762 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
| VEX_MIPS_CPU_ISA_M32R2
|
763 VEX_MIPS_CPU_ISA_M64R1
;
767 * Kernel does not provide information about supported ISAs.
768 * Populate the isa level flags based on the CPU model. That is our
771 switch VEX_MIPS_COMP_ID(vai
.hwcaps
) {
772 case VEX_PRID_COMP_CAVIUM
:
773 case VEX_PRID_COMP_NETLOGIC
:
774 vai
.hwcaps
|= (VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
);
776 case VEX_PRID_COMP_INGENIC_E1
:
777 case VEX_PRID_COMP_MIPS
:
778 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R2
;
780 case VEX_PRID_COMP_BROADCOM
:
781 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M32R1
;
783 case VEX_PRID_COMP_LEGACY
:
784 if ((VEX_MIPS_PROC_ID(vai
.hwcaps
) == VEX_PRID_IMP_LOONGSON_64
))
785 vai
.hwcaps
|= VEX_MIPS_CPU_ISA_M64R2
| VEX_MIPS_CPU_ISA_M64R1
|
786 VEX_MIPS_CPU_ISA_M32R2
| VEX_MIPS_CPU_ISA_M32R1
;
796 #endif /* defined(VGA_mips32) || defined(VGA_mips64) */
798 #if defined(VGP_arm64_linux)
800 /* Check to see whether we are running on a Cavium core, and if so auto-enable
801 the fallback LLSC implementation. See #369459. */
803 static Bool
VG_(parse_cpuinfo
)(void)
805 const char *search_Cavium_str
= "CPU implementer\t: 0x43";
809 SizeT num_bytes
, file_buf_size
;
812 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
813 fd
= VG_(open
)( "/proc/cpuinfo", 0, VKI_S_IRUSR
);
814 if ( sr_isError(fd
) ) return False
;
818 /* Determine the size of /proc/cpuinfo.
819 Work around broken-ness in /proc file system implementation.
820 fstat returns a zero size for /proc/cpuinfo although it is
821 claimed to be a regular file. */
823 file_buf_size
= 1000;
824 file_buf
= VG_(malloc
)("cpuinfo", file_buf_size
+ 1);
826 n
= VG_(read
)(fh
, file_buf
, file_buf_size
);
830 if (n
< file_buf_size
) break; /* reached EOF */
833 if (n
< 0) num_bytes
= 0; /* read error; ignore contents */
835 if (num_bytes
> file_buf_size
) {
836 VG_(free
)( file_buf
);
837 VG_(lseek
)( fh
, 0, VKI_SEEK_SET
);
838 file_buf
= VG_(malloc
)( "cpuinfo", num_bytes
+ 1 );
839 n
= VG_(read
)( fh
, file_buf
, num_bytes
);
840 if (n
< 0) num_bytes
= 0;
843 file_buf
[num_bytes
] = '\0';
847 if (VG_(strstr
)(file_buf
, search_Cavium_str
) != NULL
)
848 vai
.arm64_requires_fallback_LLSC
= True
;
854 #endif /* defined(VGP_arm64_linux) */
856 Bool
VG_(machine_get_hwcaps
)( void )
858 vg_assert(hwcaps_done
== False
);
861 // Whack default settings into vai, so that we only need to fill in
862 // any interesting bits.
863 LibVEX_default_VexArchInfo(&vai
);
866 { Bool have_sse1
, have_sse2
, have_sse3
, have_cx8
, have_lzcnt
, have_mmxext
;
867 UInt eax
, ebx
, ecx
, edx
, max_extended
;
871 if (!VG_(has_cpuid
)())
872 /* we can't do cpuid at all. Give up. */
875 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
877 /* we can't ask for cpuid(x) for x > 0. Give up. */
880 /* Get processor ID string, and max basic/extended index
882 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
883 VG_(memcpy
)(&vstr
[4], &edx
, 4);
884 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
887 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
890 /* get capabilities bits into edx */
891 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
893 have_sse1
= (edx
& (1<<25)) != 0; /* True => have sse insns */
894 have_sse2
= (edx
& (1<<26)) != 0; /* True => have sse2 insns */
895 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
897 /* cmpxchg8b is a minimum requirement now; if we don't have it we
898 must simply give up. But all CPUs since Pentium-I have it, so
899 that doesn't seem like much of a restriction. */
900 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
904 /* Figure out if this is an AMD that can do MMXEXT. */
906 if (0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
907 && max_extended
>= 0x80000001) {
908 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
909 /* Some older AMD processors support a sse1 subset (Integer SSE). */
910 have_mmxext
= !have_sse1
&& ((edx
& (1<<22)) != 0);
913 /* Figure out if this is an AMD or Intel that can do LZCNT. */
915 if ((0 == VG_(strcmp
)(vstr
, "AuthenticAMD")
916 || 0 == VG_(strcmp
)(vstr
, "GenuineIntel"))
917 && max_extended
>= 0x80000001) {
918 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
919 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
922 /* Intel processors don't define the mmxext extension, but since it
923 is just a sse1 subset always define it when we have sse1. */
928 vai
.endness
= VexEndnessLE
;
930 if (have_sse3
&& have_sse2
&& have_sse1
&& have_mmxext
) {
931 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
932 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
933 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
934 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE3
;
936 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
937 VG_(machine_x86_have_mxcsr
) = 1;
938 } else if (have_sse2
&& have_sse1
&& have_mmxext
) {
939 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
940 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
941 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE2
;
943 vai
.hwcaps
|= VEX_HWCAPS_X86_LZCNT
;
944 VG_(machine_x86_have_mxcsr
) = 1;
945 } else if (have_sse1
&& have_mmxext
) {
946 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
;
947 vai
.hwcaps
|= VEX_HWCAPS_X86_SSE1
;
948 VG_(machine_x86_have_mxcsr
) = 1;
949 } else if (have_mmxext
) {
950 vai
.hwcaps
= VEX_HWCAPS_X86_MMXEXT
; /*integer only sse1 subset*/
951 VG_(machine_x86_have_mxcsr
) = 0;
953 vai
.hwcaps
= 0; /*baseline - no sse at all*/
954 VG_(machine_x86_have_mxcsr
) = 0;
957 VG_(machine_get_cache_info
)(&vai
);
962 #elif defined(VGA_amd64)
963 { Bool have_sse3
, have_ssse3
, have_cx8
, have_cx16
;
964 Bool have_lzcnt
, have_avx
, have_bmi
, have_avx2
;
965 Bool have_rdtscp
, have_rdrand
, have_f16c
;
966 UInt eax
, ebx
, ecx
, edx
, max_basic
, max_extended
;
971 have_sse3
= have_ssse3
= have_cx8
= have_cx16
972 = have_lzcnt
= have_avx
= have_bmi
= have_avx2
973 = have_rdtscp
= have_rdrand
= have_f16c
= False
;
975 eax
= ebx
= ecx
= edx
= max_basic
= max_extended
= 0;
977 if (!VG_(has_cpuid
)())
978 /* we can't do cpuid at all. Give up. */
981 VG_(cpuid
)(0, 0, &eax
, &ebx
, &ecx
, &edx
);
984 /* we can't ask for cpuid(x) for x > 0. Give up. */
987 /* Get processor ID string, and max basic/extended index
989 VG_(memcpy
)(&vstr
[0], &ebx
, 4);
990 VG_(memcpy
)(&vstr
[4], &edx
, 4);
991 VG_(memcpy
)(&vstr
[8], &ecx
, 4);
994 VG_(cpuid
)(0x80000000, 0, &eax
, &ebx
, &ecx
, &edx
);
997 /* get capabilities bits into edx */
998 VG_(cpuid
)(1, 0, &eax
, &ebx
, &ecx
, &edx
);
1000 // we assume that SSE1 and SSE2 are available by default
1001 have_sse3
= (ecx
& (1<<0)) != 0; /* True => have sse3 insns */
1002 have_ssse3
= (ecx
& (1<<9)) != 0; /* True => have Sup SSE3 insns */
1007 // osxsave is ecx:27
1009 have_f16c
= (ecx
& (1<<29)) != 0; /* True => have F16C insns */
1010 have_rdrand
= (ecx
& (1<<30)) != 0; /* True => have RDRAND insns */
1013 /* have_fma = False; */
1014 if ( (ecx
& ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
1015 /* Processor supports AVX instructions and XGETBV is enabled
1016 by OS and AVX instructions are enabled by the OS. */
1018 __asm__
__volatile__("movq $0,%%rcx ; "
1019 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
1021 :/*OUT*/"=r"(w
) :/*IN*/
1022 :/*TRASH*/"rdx","rcx","rax");
1024 if ((xgetbv_0
& 7) == 7) {
1025 /* Only say we have AVX if the XSAVE-allowable
1026 bitfield-mask allows x87, SSE and AVX state. We could
1027 actually run with a more restrictive XGETBV(0) value,
1028 but VEX's implementation of XSAVE and XRSTOR assumes
1029 that all 3 bits are enabled.
1031 Also, the VEX implementation of XSAVE/XRSTOR assumes that
1032 state component [2] (the YMM high halves) are located in
1033 the XSAVE image at offsets 576 .. 831. So we have to
1034 check that here before declaring AVX to be supported. */
1035 UInt eax2
, ebx2
, ecx2
, edx2
;
1036 VG_(cpuid
)(0xD, 2, &eax2
, &ebx2
, &ecx2
, &edx2
);
1037 if (ebx2
== 576 && eax2
== 256) {
1040 /* have_fma = (ecx & (1<<12)) != 0; */
1041 /* have_fma: Probably correct, but gcc complains due to
1046 /* cmpxchg8b is a minimum requirement now; if we don't have it we
1047 must simply give up. But all CPUs since Pentium-I have it, so
1048 that doesn't seem like much of a restriction. */
1049 have_cx8
= (edx
& (1<<8)) != 0; /* True => have cmpxchg8b */
1053 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
1054 have_cx16
= (ecx
& (1<<13)) != 0; /* True => have cmpxchg16b */
1056 /* Figure out if this CPU can do LZCNT. */
1058 if (max_extended
>= 0x80000001) {
1059 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1060 have_lzcnt
= (ecx
& (1<<5)) != 0; /* True => have LZCNT */
1063 /* Can we do RDTSCP? */
1064 have_rdtscp
= False
;
1065 if (max_extended
>= 0x80000001) {
1066 VG_(cpuid
)(0x80000001, 0, &eax
, &ebx
, &ecx
, &edx
);
1067 have_rdtscp
= (edx
& (1<<27)) != 0; /* True => have RDTSVCP */
1070 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
1073 if (have_avx
&& max_basic
>= 7) {
1074 VG_(cpuid
)(7, 0, &eax
, &ebx
, &ecx
, &edx
);
1075 have_bmi
= (ebx
& (1<<3)) != 0; /* True => have BMI1 */
1076 have_avx2
= (ebx
& (1<<5)) != 0; /* True => have AVX2 */
1079 /* Sanity check for RDRAND and F16C. These don't actually *need* AVX, but
1080 it's convenient to restrict them to the AVX case since the simulated
1081 CPUID we'll offer them on has AVX as a base. */
1084 have_rdrand
= False
;
1088 vai
.endness
= VexEndnessLE
;
1089 vai
.hwcaps
= (have_sse3
? VEX_HWCAPS_AMD64_SSE3
: 0)
1090 | (have_ssse3
? VEX_HWCAPS_AMD64_SSSE3
: 0)
1091 | (have_cx16
? VEX_HWCAPS_AMD64_CX16
: 0)
1092 | (have_lzcnt
? VEX_HWCAPS_AMD64_LZCNT
: 0)
1093 | (have_avx
? VEX_HWCAPS_AMD64_AVX
: 0)
1094 | (have_bmi
? VEX_HWCAPS_AMD64_BMI
: 0)
1095 | (have_avx2
? VEX_HWCAPS_AMD64_AVX2
: 0)
1096 | (have_rdtscp
? VEX_HWCAPS_AMD64_RDTSCP
: 0)
1097 | (have_f16c
? VEX_HWCAPS_AMD64_F16C
: 0)
1098 | (have_rdrand
? VEX_HWCAPS_AMD64_RDRAND
: 0);
1100 VG_(machine_get_cache_info
)(&vai
);
1105 #elif defined(VGA_ppc32)
1107 /* Find out which subset of the ppc32 instruction set is supported by
1108 verifying whether various ppc32 instructions generate a SIGILL
1109 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1110 AT_PLATFORM entries in the ELF auxiliary table -- see also
1111 the_iifii.client_auxv in m_main.c.
1113 vki_sigset_t saved_set
, tmp_set
;
1114 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1115 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1117 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1118 volatile Bool have_isa_2_07
, have_isa_3_0
;
1121 /* This is a kludge. Really we ought to back-convert saved_act
1122 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1123 since that's a no-op on all ppc32 platforms so far supported,
1124 it's not worth the typing effort. At least include most basic
1126 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1128 VG_(sigemptyset
)(&tmp_set
);
1129 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1130 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1132 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1135 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1137 tmp_sigill_act
= saved_sigill_act
;
1139 r
= VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1141 tmp_sigfpe_act
= saved_sigfpe_act
;
1143 /* NODEFER: signal handler does not return (from the kernel's point of
1144 view), hence if it is to successfully catch a signal more than once,
1145 we need the NODEFER flag. */
1146 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1147 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1148 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1149 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1150 r
= VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1153 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1154 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1155 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1156 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1157 r
= VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1160 /* standard FP insns */
1162 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1165 __asm__
__volatile__(".long 0xFC000090"); /*fmr 0,0 */
1170 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1173 /* Unfortunately some older assemblers don't speak Altivec (or
1174 choose not to), so to be safe we directly emit the 32-bit
1175 word corresponding to "vor 0,0,0". This fixes a build
1176 problem that happens on Debian 3.1 (ppc32), and probably
1177 various other places. */
1178 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1181 /* General-Purpose optional (fsqrt, fsqrts) */
1183 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1186 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1189 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1191 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1194 __asm__
__volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1197 /* VSX support implies Power ISA 2.06 */
1199 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1202 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1205 /* Check for Decimal Floating Point (DFP) support. */
1207 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1210 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1213 /* Check for ISA 2.07 support. */
1214 have_isa_2_07
= True
;
1215 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1216 have_isa_2_07
= False
;
1218 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1221 /* Check for ISA 3.0 support. */
1222 have_isa_3_0
= True
;
1223 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1224 have_isa_3_0
= False
;
1226 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1229 /* determine dcbz/dcbzl sizes while we still have the signal
1230 * handlers registered */
1231 find_ppc_dcbz_sz(&vai
);
1233 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1235 r
= VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1237 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1239 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1240 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1241 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1242 (Int
)have_isa_2_07
, (Int
)have_isa_3_0
);
1243 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1244 if (have_V
&& !have_F
)
1246 if (have_FX
&& !have_F
)
1248 if (have_GX
&& !have_F
)
1251 VG_(machine_ppc32_has_FP
) = have_F
? 1 : 0;
1252 VG_(machine_ppc32_has_VMX
) = have_V
? 1 : 0;
1255 vai
.endness
= VexEndnessBE
;
1258 if (have_F
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_F
;
1259 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_V
;
1260 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_FX
;
1261 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_GX
;
1262 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_VX
;
1263 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_DFP
;
1264 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA2_07
;
1265 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC32_ISA3_0
;
1267 VG_(machine_get_cache_info
)(&vai
);
1269 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1270 called before we're ready to go. */
1274 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1276 /* Same instruction set detection algorithm as for ppc32. */
1277 vki_sigset_t saved_set
, tmp_set
;
1278 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1279 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1281 volatile Bool have_F
, have_V
, have_FX
, have_GX
, have_VX
, have_DFP
;
1282 volatile Bool have_isa_2_07
, have_isa_3_0
;
1285 /* This is a kludge. Really we ought to back-convert saved_act
1286 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1287 since that's a no-op on all ppc64 platforms so far supported,
1288 it's not worth the typing effort. At least include most basic
1290 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1292 VG_(sigemptyset
)(&tmp_set
);
1293 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1294 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1296 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1299 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1301 tmp_sigill_act
= saved_sigill_act
;
1303 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1304 tmp_sigfpe_act
= saved_sigfpe_act
;
1306 /* NODEFER: signal handler does not return (from the kernel's point of
1307 view), hence if it is to successfully catch a signal more than once,
1308 we need the NODEFER flag. */
1309 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1310 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1311 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1312 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1313 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1315 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1316 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1317 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1318 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1319 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1321 /* standard FP insns */
1323 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1326 __asm__
__volatile__("fmr 0,0");
1331 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1334 __asm__
__volatile__(".long 0x10000484"); /*vor 0,0,0*/
1337 /* General-Purpose optional (fsqrt, fsqrts) */
1339 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1342 __asm__
__volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1345 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1347 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1350 __asm__
__volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1353 /* VSX support implies Power ISA 2.06 */
1355 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1358 __asm__
__volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1361 /* Check for Decimal Floating Point (DFP) support. */
1363 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1366 __asm__
__volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1369 /* Check for ISA 2.07 support. */
1370 have_isa_2_07
= True
;
1371 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1372 have_isa_2_07
= False
;
1374 __asm__
__volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1377 /* Check for ISA 3.0 support. */
1378 have_isa_3_0
= True
;
1379 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1380 have_isa_3_0
= False
;
1382 __asm__
__volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
1385 /* determine dcbz/dcbzl sizes while we still have the signal
1386 * handlers registered */
1387 find_ppc_dcbz_sz(&vai
);
1389 VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1390 VG_(sigaction
)(VKI_SIGFPE
, &saved_sigfpe_act
, NULL
);
1391 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1392 VG_(debugLog
)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n",
1393 (Int
)have_F
, (Int
)have_V
, (Int
)have_FX
,
1394 (Int
)have_GX
, (Int
)have_VX
, (Int
)have_DFP
,
1395 (Int
)have_isa_2_07
, (int)have_isa_3_0
);
1396 /* on ppc64be, if we don't even have FP, just give up. */
1400 VG_(machine_ppc64_has_VMX
) = have_V
? 1 : 0;
1403 # if defined(VKI_LITTLE_ENDIAN)
1404 vai
.endness
= VexEndnessLE
;
1405 # elif defined(VKI_BIG_ENDIAN)
1406 vai
.endness
= VexEndnessBE
;
1408 vai
.endness
= VexEndness_INVALID
;
1412 if (have_V
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_V
;
1413 if (have_FX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_FX
;
1414 if (have_GX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_GX
;
1415 if (have_VX
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_VX
;
1416 if (have_DFP
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_DFP
;
1417 if (have_isa_2_07
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA2_07
;
1418 if (have_isa_3_0
) vai
.hwcaps
|= VEX_HWCAPS_PPC64_ISA3_0
;
1420 VG_(machine_get_cache_info
)(&vai
);
1422 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1423 called before we're ready to go. */
1427 #elif defined(VGA_s390x)
1429 # include "libvex_s390x_common.h"
1432 /* Instruction set detection code borrowed from ppc above. */
1433 vki_sigset_t saved_set
, tmp_set
;
1434 vki_sigaction_fromK_t saved_sigill_act
;
1435 vki_sigaction_toK_t tmp_sigill_act
;
1437 volatile Bool have_LDISP
, have_STFLE
;
1440 /* If the model is "unknown" don't treat this as an error. Assume
1441 this is a brand-new machine model for which we don't have the
1442 identification yet. Keeping fingers crossed. */
1443 model
= VG_(get_machine_model
)();
1445 /* Unblock SIGILL and stash away the old action for that signal */
1446 VG_(sigemptyset
)(&tmp_set
);
1447 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1449 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1452 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1454 tmp_sigill_act
= saved_sigill_act
;
1456 /* NODEFER: signal handler does not return (from the kernel's point of
1457 view), hence if it is to successfully catch a signal more than once,
1458 we need the NODEFER flag. */
1459 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1460 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1461 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1462 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1463 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1465 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1466 is not supported on z900. */
1469 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1472 /* BASR loads the address of the next insn into r1. Needed to avoid
1473 a segfault in XY. */
1474 __asm__
__volatile__("basr %%r1,%%r0\n\t"
1475 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1476 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1479 /* Check availability of STFLE. If available store facility bits
1481 ULong hoststfle
[S390_NUM_FACILITY_DW
];
1483 for (i
= 0; i
< S390_NUM_FACILITY_DW
; ++i
)
1487 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1490 register ULong reg0
asm("0") = S390_NUM_FACILITY_DW
- 1;
1492 __asm__
__volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1493 : "=m" (hoststfle
), "+d"(reg0
)
1494 : : "cc", "memory");
1497 /* Restore signals */
1498 r
= VG_(sigaction
)(VKI_SIGILL
, &saved_sigill_act
, NULL
);
1500 r
= VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1503 vai
.endness
= VexEndnessBE
;
1506 if (have_STFLE
) vai
.hwcaps
|= VEX_HWCAPS_S390X_STFLE
;
1508 /* Use long displacement only on machines >= z990. For all other
1509 machines it is millicoded and therefore slow. */
1510 if (model
>= VEX_S390X_MODEL_Z990
)
1511 vai
.hwcaps
|= VEX_HWCAPS_S390X_LDISP
;
1514 /* Detect presence of certain facilities using the STFLE insn.
1515 Note, that these facilities were introduced at the same time or later
1516 as STFLE, so the absence of STLFE implies the absence of the facility
1517 we're trying to detect. */
1518 struct fac_hwcaps_map
{
1522 const HChar name
[6]; // may need adjustment for new facility names
1524 { False
, S390_FAC_EIMM
, VEX_HWCAPS_S390X_EIMM
, "EIMM" },
1525 { False
, S390_FAC_GIE
, VEX_HWCAPS_S390X_GIE
, "GIE" },
1526 { False
, S390_FAC_DFP
, VEX_HWCAPS_S390X_DFP
, "DFP" },
1527 { False
, S390_FAC_FPSE
, VEX_HWCAPS_S390X_FGX
, "FGX" },
1528 { False
, S390_FAC_ETF2
, VEX_HWCAPS_S390X_ETF2
, "ETF2" },
1529 { False
, S390_FAC_ETF3
, VEX_HWCAPS_S390X_ETF3
, "ETF3" },
1530 { False
, S390_FAC_STCKF
, VEX_HWCAPS_S390X_STCKF
, "STCKF" },
1531 { False
, S390_FAC_FPEXT
, VEX_HWCAPS_S390X_FPEXT
, "FPEXT" },
1532 { False
, S390_FAC_LSC
, VEX_HWCAPS_S390X_LSC
, "LSC" },
1533 { False
, S390_FAC_PFPO
, VEX_HWCAPS_S390X_PFPO
, "PFPO" },
1534 { False
, S390_FAC_VX
, VEX_HWCAPS_S390X_VX
, "VX" },
1535 { False
, S390_FAC_MSA5
, VEX_HWCAPS_S390X_MSA5
, "MSA5" }
1538 /* Set hwcaps according to the detected facilities */
1539 UChar dw_number
= 0;
1541 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1542 vg_assert(fac_hwcaps
[i
].facility_bit
<= 191); // for now
1543 dw_number
= fac_hwcaps
[i
].facility_bit
/ 64;
1544 fac_bit
= fac_hwcaps
[i
].facility_bit
% 64;
1545 if (hoststfle
[dw_number
] & (1ULL << (63 - fac_bit
))) {
1546 fac_hwcaps
[i
].installed
= True
;
1547 vai
.hwcaps
|= fac_hwcaps
[i
].hwcaps_bit
;
1551 /* Build up a string showing the probed-for facilities */
1552 HChar fac_str
[(sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]) *
1553 (sizeof fac_hwcaps
[0].name
+ 3) + // %s %d
1554 7 + 1 + 4 + 2 // machine %4d
1557 p
+= VG_(sprintf
)(p
, "machine %4d ", model
);
1558 for (i
=0; i
< sizeof fac_hwcaps
/ sizeof fac_hwcaps
[0]; ++i
) {
1559 p
+= VG_(sprintf
)(p
, " %s %1u", fac_hwcaps
[i
].name
,
1560 fac_hwcaps
[i
].installed
);
1564 VG_(debugLog
)(1, "machine", "%s\n", fac_str
);
1565 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1567 VG_(machine_get_cache_info
)(&vai
);
1572 #elif defined(VGA_arm)
1574 /* Same instruction set detection algorithm as for ppc32. */
1575 vki_sigset_t saved_set
, tmp_set
;
1576 vki_sigaction_fromK_t saved_sigill_act
, saved_sigfpe_act
;
1577 vki_sigaction_toK_t tmp_sigill_act
, tmp_sigfpe_act
;
1579 volatile Bool have_VFP
, have_VFP2
, have_VFP3
, have_NEON
, have_V8
;
1580 volatile Int archlevel
;
1583 /* This is a kludge. Really we ought to back-convert saved_act
1584 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1585 since that's a no-op on all ppc64 platforms so far supported,
1586 it's not worth the typing effort. At least include most basic
1588 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1590 VG_(sigemptyset
)(&tmp_set
);
1591 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1592 VG_(sigaddset
)(&tmp_set
, VKI_SIGFPE
);
1594 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1597 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1599 tmp_sigill_act
= saved_sigill_act
;
1601 VG_(sigaction
)(VKI_SIGFPE
, NULL
, &saved_sigfpe_act
);
1602 tmp_sigfpe_act
= saved_sigfpe_act
;
1604 /* NODEFER: signal handler does not return (from the kernel's point of
1605 view), hence if it is to successfully catch a signal more than once,
1606 we need the NODEFER flag. */
1607 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1608 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1609 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1610 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1611 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1613 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1614 tmp_sigfpe_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1615 tmp_sigfpe_act
.sa_flags
|= VKI_SA_NODEFER
;
1616 tmp_sigfpe_act
.ksa_handler
= handler_unsup_insn
;
1617 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1621 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1624 __asm__
__volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1626 /* There are several generation of VFP extension but they differs very
1627 little so for now we will not distinguish them. */
1628 have_VFP2
= have_VFP
;
1629 have_VFP3
= have_VFP
;
1633 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1636 __asm__
__volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1639 /* ARM architecture level */
1640 archlevel
= 5; /* v5 will be base level */
1641 if (archlevel
< 7) {
1643 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1646 __asm__
__volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1649 if (archlevel
< 6) {
1651 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1654 __asm__
__volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1660 if (archlevel
== 7) {
1661 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1664 __asm__
__volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
1666 if (have_V8
&& have_NEON
&& have_VFP3
) {
1671 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1672 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigfpe_act
, &tmp_sigfpe_act
);
1673 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1674 VG_(sigaction
)(VKI_SIGFPE
, &tmp_sigfpe_act
, NULL
);
1675 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1677 VG_(debugLog
)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1678 archlevel
, (Int
)have_VFP
, (Int
)have_VFP2
, (Int
)have_VFP3
,
1681 VG_(machine_arm_archlevel
) = archlevel
;
1684 vai
.endness
= VexEndnessLE
;
1686 vai
.hwcaps
= VEX_ARM_ARCHLEVEL(archlevel
);
1687 if (have_VFP3
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP3
;
1688 if (have_VFP2
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP2
;
1689 if (have_VFP
) vai
.hwcaps
|= VEX_HWCAPS_ARM_VFP
;
1690 if (have_NEON
) vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1692 VG_(machine_get_cache_info
)(&vai
);
1697 #elif defined(VGA_arm64)
1700 vai
.endness
= VexEndnessLE
;
1702 /* So far there are no variants. */
1705 VG_(machine_get_cache_info
)(&vai
);
1707 /* Check whether we need to use the fallback LLSC implementation.
1708 If the check fails, give up. */
1709 if (! VG_(parse_cpuinfo
)())
1712 /* 0 denotes 'not set'. The range of legitimate values here,
1713 after being set that is, is 2 though 17 inclusive. */
1714 vg_assert(vai
.arm64_dMinLine_lg2_szB
== 0);
1715 vg_assert(vai
.arm64_iMinLine_lg2_szB
== 0);
1717 __asm__
__volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0
));
1718 vai
.arm64_dMinLine_lg2_szB
= ((ctr_el0
>> 16) & 0xF) + 2;
1719 vai
.arm64_iMinLine_lg2_szB
= ((ctr_el0
>> 0) & 0xF) + 2;
1720 VG_(debugLog
)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1721 "ctr_el0.iMinLine_szB = %d\n",
1722 1 << vai
.arm64_dMinLine_lg2_szB
,
1723 1 << vai
.arm64_iMinLine_lg2_szB
);
1724 VG_(debugLog
)(1, "machine", "ARM64: requires_fallback_LLSC: %s\n",
1725 vai
.arm64_requires_fallback_LLSC
? "yes" : "no");
1730 #elif defined(VGA_mips32)
1732 /* Define the position of F64 bit in FIR register. */
1735 if (!VG_(parse_cpuinfo
)())
1738 # if defined(VKI_LITTLE_ENDIAN)
1739 vai
.endness
= VexEndnessLE
;
1740 # elif defined(VKI_BIG_ENDIAN)
1741 vai
.endness
= VexEndnessBE
;
1743 vai
.endness
= VexEndness_INVALID
;
1746 /* Same instruction set detection algorithm as for ppc32/arm... */
1747 vki_sigset_t saved_set
, tmp_set
;
1748 vki_sigaction_fromK_t saved_sigill_act
;
1749 vki_sigaction_toK_t tmp_sigill_act
;
1751 volatile Bool have_DSP
, have_DSPr2
, have_MSA
;
1754 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1756 VG_(sigemptyset
)(&tmp_set
);
1757 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1759 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1762 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1764 tmp_sigill_act
= saved_sigill_act
;
1766 /* NODEFER: signal handler does not return (from the kernel's point of
1767 view), hence if it is to successfully catch a signal more than once,
1768 we need the NODEFER flag. */
1769 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1770 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1771 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1772 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1773 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1775 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
1777 /* MSA instructions. */
1779 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1782 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1785 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
1787 /* DSPr2 instructions. */
1789 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1792 __asm__
__volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1795 /* We assume it's 74K, since it can run DSPr2. */
1796 vai
.hwcaps
|= VEX_PRID_IMP_74K
;
1798 /* DSP instructions. */
1800 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1803 __asm__
__volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1806 /* We assume it's 34K, since it has support for DSP. */
1807 vai
.hwcaps
|= VEX_PRID_IMP_34K
;
1813 # if defined(VGP_mips32_linux)
1814 Int fpmode
= VG_(prctl
)(VKI_PR_GET_FP_MODE
, 0, 0, 0, 0);
1820 /* prctl(PR_GET_FP_MODE) is not supported by Kernel,
1821 we are using alternative way to determine FP mode */
1824 if (!VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1827 ".set noreorder\n\t"
1829 ".set hardfloat\n\t"
1830 "lui $t0, 0x3FF0\n\t"
1837 : "t0", "$f0", "$f1", "memory");
1839 fpmode
= (result
!= 0x3FF0000000000000ull
);
1844 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
1846 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1847 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1848 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1850 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1851 VG_(machine_get_cache_info
)(&vai
);
1856 #elif defined(VGA_mips64)
1859 if (!VG_(parse_cpuinfo
)())
1862 # if defined(VKI_LITTLE_ENDIAN)
1863 vai
.endness
= VexEndnessLE
;
1864 # elif defined(VKI_BIG_ENDIAN)
1865 vai
.endness
= VexEndnessBE
;
1867 vai
.endness
= VexEndness_INVALID
;
1870 vai
.hwcaps
|= VEX_MIPS_HOST_FR
;
1872 /* Same instruction set detection algorithm as for ppc32/arm... */
1873 vki_sigset_t saved_set
, tmp_set
;
1874 vki_sigaction_fromK_t saved_sigill_act
;
1875 vki_sigaction_toK_t tmp_sigill_act
;
1877 volatile Bool have_MSA
;
1880 vg_assert(sizeof(vki_sigaction_fromK_t
) == sizeof(vki_sigaction_toK_t
));
1882 VG_(sigemptyset
)(&tmp_set
);
1883 VG_(sigaddset
)(&tmp_set
, VKI_SIGILL
);
1885 r
= VG_(sigprocmask
)(VKI_SIG_UNBLOCK
, &tmp_set
, &saved_set
);
1888 r
= VG_(sigaction
)(VKI_SIGILL
, NULL
, &saved_sigill_act
);
1890 tmp_sigill_act
= saved_sigill_act
;
1892 /* NODEFER: signal handler does not return (from the kernel's point of
1893 view), hence if it is to successfully catch a signal more than once,
1894 we need the NODEFER flag. */
1895 tmp_sigill_act
.sa_flags
&= ~VKI_SA_RESETHAND
;
1896 tmp_sigill_act
.sa_flags
&= ~VKI_SA_SIGINFO
;
1897 tmp_sigill_act
.sa_flags
|= VKI_SA_NODEFER
;
1898 tmp_sigill_act
.ksa_handler
= handler_unsup_insn
;
1899 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1901 if (VEX_PRID_COMP_MIPS
== VEX_MIPS_COMP_ID(vai
.hwcaps
)) {
1903 /* MSA instructions */
1905 if (VG_MINIMAL_SETJMP(env_unsup_insn
)) {
1908 __asm__
__volatile__(".word 0x7800088E"); /* addv.b w2, w1, w0 */
1911 vai
.hwcaps
|= VEX_PRID_IMP_P5600
;
1915 VG_(convert_sigaction_fromK_to_toK
)(&saved_sigill_act
, &tmp_sigill_act
);
1916 VG_(sigaction
)(VKI_SIGILL
, &tmp_sigill_act
, NULL
);
1917 VG_(sigprocmask
)(VKI_SIG_SETMASK
, &saved_set
, NULL
);
1919 VG_(debugLog
)(1, "machine", "hwcaps = 0x%x\n", vai
.hwcaps
);
1921 VG_(machine_get_cache_info
)(&vai
);
1927 # error "Unknown arch"
1931 /* Notify host cpu instruction cache line size. */
1932 #if defined(VGA_ppc32)
1933 void VG_(machine_ppc32_set_clszB
)( Int szB
)
1935 vg_assert(hwcaps_done
);
1937 /* Either the value must not have been set yet (zero) or we can
1938 tolerate it being set to the same value multiple times, as the
1939 stack scanning logic in m_main is a bit stupid. */
1940 vg_assert(vai
.ppc_icache_line_szB
== 0
1941 || vai
.ppc_icache_line_szB
== szB
);
1943 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
1944 vai
.ppc_icache_line_szB
= szB
;
1949 /* Notify host cpu instruction cache line size. */
1950 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1951 void VG_(machine_ppc64_set_clszB
)( Int szB
)
1953 vg_assert(hwcaps_done
);
1955 /* Either the value must not have been set yet (zero) or we can
1956 tolerate it being set to the same value multiple times, as the
1957 stack scanning logic in m_main is a bit stupid. */
1958 vg_assert(vai
.ppc_icache_line_szB
== 0
1959 || vai
.ppc_icache_line_szB
== szB
);
1961 vg_assert(szB
== 16 || szB
== 32 || szB
== 64 || szB
== 128);
1962 vai
.ppc_icache_line_szB
= szB
;
1967 /* Notify host's ability to handle NEON instructions. */
1968 #if defined(VGA_arm)
1969 void VG_(machine_arm_set_has_NEON
)( Bool has_neon
)
1971 vg_assert(hwcaps_done
);
1972 /* There's nothing else we can sanity check. */
1975 vai
.hwcaps
|= VEX_HWCAPS_ARM_NEON
;
1977 vai
.hwcaps
&= ~VEX_HWCAPS_ARM_NEON
;
1983 /* Fetch host cpu info, once established. */
1984 void VG_(machine_get_VexArchInfo
)( /*OUT*/VexArch
* pVa
,
1985 /*OUT*/VexArchInfo
* pVai
)
1987 vg_assert(hwcaps_done
);
1989 if (pVai
) *pVai
= vai
;
1993 /* Returns the size of the largest guest register that we will
1994 simulate in this run. This depends on both the guest architecture
1995 and on the specific capabilities we are simulating for that guest
1996 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1997 or 32. General rule: if in doubt, return a value larger than
2000 This information is needed by Cachegrind and Callgrind to decide
2001 what the minimum cache line size they are prepared to simulate is.
2002 Basically require that the minimum cache line size is at least as
2003 large as the largest register that might get transferred to/from
2004 memory, so as to guarantee that any such transaction can straddle
2005 at most 2 cache lines.
2007 Int
VG_(machine_get_size_of_largest_guest_register
) ( void )
2009 vg_assert(hwcaps_done
);
2010 /* Once hwcaps_done is True, we can fish around inside va/vai to
2011 find the information we need. */
2013 # if defined(VGA_x86)
2014 vg_assert(va
== VexArchX86
);
2015 /* We don't support AVX, so 32 is out. At the other end, even if
2016 we don't support any SSE, the X87 can generate 10 byte
2017 transfers, so let's say 16 to be on the safe side. Hence the
2018 answer is always 16. */
2021 # elif defined(VGA_amd64)
2022 /* if AVX then 32 else 16 */
2023 return (vai
.hwcaps
& VEX_HWCAPS_AMD64_AVX
) ? 32 : 16;
2025 # elif defined(VGA_ppc32)
2026 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2027 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_V
) return 16;
2028 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_VX
) return 16;
2029 if (vai
.hwcaps
& VEX_HWCAPS_PPC32_DFP
) return 16;
2032 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
2033 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
2034 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_V
) return 16;
2035 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_VX
) return 16;
2036 if (vai
.hwcaps
& VEX_HWCAPS_PPC64_DFP
) return 16;
2039 # elif defined(VGA_s390x)
2042 # elif defined(VGA_arm)
2043 /* Really it depends whether or not we have NEON, but let's just
2044 assume we always do. */
2047 # elif defined(VGA_arm64)
2048 /* ARM64 always has Neon, AFAICS. */
2051 # elif defined(VGA_mips32)
2052 /* The guest state implies 4, but that can't really be true, can
2056 # elif defined(VGA_mips64)
2060 # error "Unknown arch"
2065 // Given a pointer to a function as obtained by "& functionname" in C,
2066 // produce a pointer to the actual entry point for the function.
2067 void* VG_(fnptr_to_fnentry
)( void* f
)
2069 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2070 || defined(VGP_arm_linux) || defined(VGO_darwin) \
2071 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
2072 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
2073 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
2074 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2076 # elif defined(VGP_ppc64be_linux)
2077 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
2078 3-word function descriptor, of which the first word is the entry
2080 UWord
* descr
= (UWord
*)f
;
2081 return (void*)(descr
[0]);
2083 # error "Unknown platform"
2087 /*--------------------------------------------------------------------*/
2089 /*--------------------------------------------------------------------*/