2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_amd64.h"
37 #include "libvex_ir.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_amd64_defs.h"
44 #include "guest_generic_x87.h"
47 /* This file contains helper functions for amd64 guest code.
48 Calls to these functions are generated by the back end.
49 These calls are of course in the host machine code and
50 this file will be compiled to host machine code, so that
53 Only change the signatures of these helper functions very
54 carefully. If you change the signature here, you'll have to change
55 the parameters passed to it in the IR calls constructed by
58 The convention used is that all functions called from generated
59 code are named amd64g_<something>, and any function whose name lacks
60 that prefix is not called from generated code. Note that some
61 LibVEX_* functions can however be called by VEX's client, but that
62 is not the same as calling them from VEX-generated code.
66 /* Set to 1 to get detailed profiling info about use of the flag
68 #define PROFILE_RFLAGS 0
71 /*---------------------------------------------------------------*/
72 /*--- %rflags run-time helpers. ---*/
73 /*---------------------------------------------------------------*/
75 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
78 static void mullS64 ( Long u
, Long v
, Long
* rHi
, Long
* rLo
)
80 const Long halfMask
= 0xFFFFFFFFLL
;
82 Long u1
, v1
, w1
, w2
, t
;
88 t
= u1
* v0
+ (w0
>> 32);
92 *rHi
= u1
* v1
+ w2
+ (w1
>> 32);
93 *rLo
= (Long
)((ULong
)u
* (ULong
)v
);
96 static void mullU64 ( ULong u
, ULong v
, ULong
* rHi
, ULong
* rLo
)
98 const ULong halfMask
= 0xFFFFFFFFULL
;
100 ULong u1
, v1
, w1
,w2
,t
;
106 t
= u1
* v0
+ (w0
>> 32);
110 *rHi
= u1
* v1
+ w2
+ (w1
>> 32);
115 static const UChar parity_table
[256] = {
116 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
117 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
118 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
119 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
120 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
121 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
122 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
123 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
124 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
125 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
126 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
127 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
128 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
129 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
130 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
131 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
132 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
133 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
134 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
135 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
136 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
137 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
138 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
139 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
140 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
141 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
142 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
143 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
144 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
145 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
146 AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0,
147 0, AMD64G_CC_MASK_P
, AMD64G_CC_MASK_P
, 0, AMD64G_CC_MASK_P
, 0, 0, AMD64G_CC_MASK_P
,
150 /* generalised left-shifter */
151 static inline Long
lshift ( Long x
, Int n
)
154 return (ULong
)x
<< n
;
159 /* identity on ULong */
160 static inline ULong
idULong ( ULong x
)
166 #define PREAMBLE(__data_bits) \
167 /* const */ ULong DATA_MASK \
174 : 0xFFFFFFFFFFFFFFFFULL)); \
175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
178 /* const */ ULong CC_NDEP = cc_ndep_formal; \
179 /* Four bogus assignments, which hopefully gcc can */ \
180 /* optimise away, and which stop it complaining about */ \
181 /* unused variables. */ \
182 SIGN_MASK = SIGN_MASK; \
183 DATA_MASK = DATA_MASK; \
188 /*-------------------------------------------------------------*/
190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
192 PREAMBLE(DATA_BITS); \
193 { ULong cf, pf, af, zf, sf, of; \
194 ULong argL, argR, res; \
198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
199 pf = parity_table[(UChar)res]; \
200 af = (res ^ argL ^ argR) & 0x10; \
201 zf = ((DATA_UTYPE)res == 0) << 6; \
202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
205 return cf | pf | af | zf | sf | of; \
209 /*-------------------------------------------------------------*/
211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
213 PREAMBLE(DATA_BITS); \
214 { ULong cf, pf, af, zf, sf, of; \
215 ULong argL, argR, res; \
219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
220 pf = parity_table[(UChar)res]; \
221 af = (res ^ argL ^ argR) & 0x10; \
222 zf = ((DATA_UTYPE)res == 0) << 6; \
223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
224 of = lshift((argL ^ argR) & (argL ^ res), \
225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
226 return cf | pf | af | zf | sf | of; \
230 /*-------------------------------------------------------------*/
232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
234 PREAMBLE(DATA_BITS); \
235 { ULong cf, pf, af, zf, sf, of; \
236 ULong argL, argR, oldC, res; \
237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
239 argR = CC_DEP2 ^ oldC; \
240 res = (argL + argR) + oldC; \
242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
245 pf = parity_table[(UChar)res]; \
246 af = (res ^ argL ^ argR) & 0x10; \
247 zf = ((DATA_UTYPE)res == 0) << 6; \
248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
251 return cf | pf | af | zf | sf | of; \
255 /*-------------------------------------------------------------*/
257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
259 PREAMBLE(DATA_BITS); \
260 { ULong cf, pf, af, zf, sf, of; \
261 ULong argL, argR, oldC, res; \
262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
264 argR = CC_DEP2 ^ oldC; \
265 res = (argL - argR) - oldC; \
267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
270 pf = parity_table[(UChar)res]; \
271 af = (res ^ argL ^ argR) & 0x10; \
272 zf = ((DATA_UTYPE)res == 0) << 6; \
273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
274 of = lshift((argL ^ argR) & (argL ^ res), \
275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
276 return cf | pf | af | zf | sf | of; \
280 /*-------------------------------------------------------------*/
282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
284 PREAMBLE(DATA_BITS); \
285 { ULong cf, pf, af, zf, sf, of; \
287 pf = parity_table[(UChar)CC_DEP1]; \
289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
292 return cf | pf | af | zf | sf | of; \
296 /*-------------------------------------------------------------*/
298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
300 PREAMBLE(DATA_BITS); \
301 { ULong cf, pf, af, zf, sf, of; \
302 ULong argL, argR, res; \
306 cf = CC_NDEP & AMD64G_CC_MASK_C; \
307 pf = parity_table[(UChar)res]; \
308 af = (res ^ argL ^ argR) & 0x10; \
309 zf = ((DATA_UTYPE)res == 0) << 6; \
310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
312 return cf | pf | af | zf | sf | of; \
316 /*-------------------------------------------------------------*/
318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
320 PREAMBLE(DATA_BITS); \
321 { ULong cf, pf, af, zf, sf, of; \
322 ULong argL, argR, res; \
326 cf = CC_NDEP & AMD64G_CC_MASK_C; \
327 pf = parity_table[(UChar)res]; \
328 af = (res ^ argL ^ argR) & 0x10; \
329 zf = ((DATA_UTYPE)res == 0) << 6; \
330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
331 of = ((res & DATA_MASK) \
332 == ((ULong)SIGN_MASK - 1)) << 11; \
333 return cf | pf | af | zf | sf | of; \
337 /*-------------------------------------------------------------*/
339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
341 PREAMBLE(DATA_BITS); \
342 { ULong cf, pf, af, zf, sf, of; \
343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
344 pf = parity_table[(UChar)CC_DEP1]; \
345 af = 0; /* undefined */ \
346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
348 /* of is defined if shift count == 1 */ \
349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
350 & AMD64G_CC_MASK_O; \
351 return cf | pf | af | zf | sf | of; \
355 /*-------------------------------------------------------------*/
357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
359 PREAMBLE(DATA_BITS); \
360 { ULong cf, pf, af, zf, sf, of; \
362 pf = parity_table[(UChar)CC_DEP1]; \
363 af = 0; /* undefined */ \
364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
366 /* of is defined if shift count == 1 */ \
367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
368 & AMD64G_CC_MASK_O; \
369 return cf | pf | af | zf | sf | of; \
373 /*-------------------------------------------------------------*/
375 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
376 /* DEP1 = result, NDEP = old flags */
377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
379 PREAMBLE(DATA_BITS); \
381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
382 | (AMD64G_CC_MASK_C & CC_DEP1) \
383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
385 ^ lshift(CC_DEP1, 11))); \
390 /*-------------------------------------------------------------*/
392 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
393 /* DEP1 = result, NDEP = old flags */
394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
396 PREAMBLE(DATA_BITS); \
398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
407 /*-------------------------------------------------------------*/
409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
410 DATA_U2TYPE, NARROWto2U) \
412 PREAMBLE(DATA_BITS); \
413 { ULong cf, pf, af, zf, sf, of; \
416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
417 * ((DATA_UTYPE)CC_DEP2) ); \
420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
424 pf = parity_table[(UChar)lo]; \
425 af = 0; /* undefined */ \
426 zf = (lo == 0) << 6; \
427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
429 return cf | pf | af | zf | sf | of; \
433 /*-------------------------------------------------------------*/
435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
436 DATA_S2TYPE, NARROWto2S) \
438 PREAMBLE(DATA_BITS); \
439 { ULong cf, pf, af, zf, sf, of; \
442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
450 pf = parity_table[(UChar)lo]; \
451 af = 0; /* undefined */ \
452 zf = (lo == 0) << 6; \
453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
455 return cf | pf | af | zf | sf | of; \
459 /*-------------------------------------------------------------*/
461 #define ACTIONS_UMULQ \
464 { ULong cf, pf, af, zf, sf, of; \
466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
468 pf = parity_table[(UChar)lo]; \
469 af = 0; /* undefined */ \
470 zf = (lo == 0) << 6; \
471 sf = lshift(lo, 8 - 64) & 0x80; \
473 return cf | pf | af | zf | sf | of; \
477 /*-------------------------------------------------------------*/
479 #define ACTIONS_SMULQ \
482 { ULong cf, pf, af, zf, sf, of; \
484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
485 cf = (hi != (lo >>/*s*/ (64-1))); \
486 pf = parity_table[(UChar)lo]; \
487 af = 0; /* undefined */ \
488 zf = (lo == 0) << 6; \
489 sf = lshift(lo, 8 - 64) & 0x80; \
491 return cf | pf | af | zf | sf | of; \
495 /*-------------------------------------------------------------*/
497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
499 PREAMBLE(DATA_BITS); \
500 { ULong cf, pf, af, zf, sf, of; \
504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
507 return cf | pf | af | zf | sf | of; \
511 /*-------------------------------------------------------------*/
513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
515 PREAMBLE(DATA_BITS); \
516 { ULong cf, pf, af, zf, sf, of; \
517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
523 return cf | pf | af | zf | sf | of; \
527 /*-------------------------------------------------------------*/
529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
531 PREAMBLE(DATA_BITS); \
532 { Long cf, pf, af, zf, sf, of; \
533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
539 return cf | pf | af | zf | sf | of; \
543 /*-------------------------------------------------------------*/
545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
547 PREAMBLE(DATA_BITS); \
548 { ULong cf, pf, af, zf, sf, of; \
549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
555 return cf | pf | af | zf | sf | of; \
559 /*-------------------------------------------------------------*/
561 #define ACTIONS_ADX(DATA_BITS,DATA_UTYPE,FLAGNAME) \
563 PREAMBLE(DATA_BITS); \
564 { ULong ocf; /* o or c */ \
565 ULong argL, argR, oldOC, res; \
566 oldOC = (CC_NDEP >> AMD64G_CC_SHIFT_##FLAGNAME) & 1; \
568 argR = CC_DEP2 ^ oldOC; \
569 res = (argL + argR) + oldOC; \
571 ocf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
573 ocf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
574 return (CC_NDEP & ~AMD64G_CC_MASK_##FLAGNAME) \
575 | (ocf << AMD64G_CC_SHIFT_##FLAGNAME); \
579 /*-------------------------------------------------------------*/
584 static Bool initted
= False
;
586 /* C flag, fast route */
587 static UInt tabc_fast
[AMD64G_CC_OP_NUMBER
];
588 /* C flag, slow route */
589 static UInt tabc_slow
[AMD64G_CC_OP_NUMBER
];
590 /* table for calculate_cond */
591 static UInt tab_cond
[AMD64G_CC_OP_NUMBER
][16];
592 /* total entry counts for calc_all, calc_c, calc_cond. */
593 static UInt n_calc_all
= 0;
594 static UInt n_calc_c
= 0;
595 static UInt n_calc_cond
= 0;
597 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
600 static void showCounts ( void )
604 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
605 n_calc_all
, n_calc_cond
, n_calc_c
);
607 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
608 " S NS P NP L NL LE NLE\n");
609 vex_printf(" -----------------------------------------------------"
610 "----------------------------------------\n");
611 for (op
= 0; op
< AMD64G_CC_OP_NUMBER
; op
++) {
614 if (op
> 0 && (op
-1) % 4 == 0)
616 if (op
> 0 && (op
-1) % 4 == 1)
618 if (op
> 0 && (op
-1) % 4 == 2)
620 if (op
> 0 && (op
-1) % 4 == 3)
623 vex_printf("%2d%c: ", op
, ch
);
624 vex_printf("%6u ", tabc_slow
[op
]);
625 vex_printf("%6u ", tabc_fast
[op
]);
626 for (co
= 0; co
< 16; co
++) {
627 Int n
= tab_cond
[op
][co
];
629 vex_printf(" %3dK", n
/ 1000);
632 vex_printf(" %3d ", n
);
642 static void initCounts ( void )
646 for (op
= 0; op
< AMD64G_CC_OP_NUMBER
; op
++) {
647 tabc_fast
[op
] = tabc_slow
[op
] = 0;
648 for (co
= 0; co
< 16; co
++)
649 tab_cond
[op
][co
] = 0;
653 #endif /* PROFILE_RFLAGS */
656 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
657 /* Calculate all the 6 flags from the supplied thunk parameters.
658 Worker function, not directly called from generated code. */
660 ULong
amd64g_calculate_rflags_all_WRK ( ULong cc_op
,
661 ULong cc_dep1_formal
,
662 ULong cc_dep2_formal
,
663 ULong cc_ndep_formal
)
666 case AMD64G_CC_OP_COPY
:
667 return cc_dep1_formal
668 & (AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
| AMD64G_CC_MASK_Z
669 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
);
671 case AMD64G_CC_OP_ADDB
: ACTIONS_ADD( 8, UChar
);
672 case AMD64G_CC_OP_ADDW
: ACTIONS_ADD( 16, UShort
);
673 case AMD64G_CC_OP_ADDL
: ACTIONS_ADD( 32, UInt
);
674 case AMD64G_CC_OP_ADDQ
: ACTIONS_ADD( 64, ULong
);
676 case AMD64G_CC_OP_ADCB
: ACTIONS_ADC( 8, UChar
);
677 case AMD64G_CC_OP_ADCW
: ACTIONS_ADC( 16, UShort
);
678 case AMD64G_CC_OP_ADCL
: ACTIONS_ADC( 32, UInt
);
679 case AMD64G_CC_OP_ADCQ
: ACTIONS_ADC( 64, ULong
);
681 case AMD64G_CC_OP_SUBB
: ACTIONS_SUB( 8, UChar
);
682 case AMD64G_CC_OP_SUBW
: ACTIONS_SUB( 16, UShort
);
683 case AMD64G_CC_OP_SUBL
: ACTIONS_SUB( 32, UInt
);
684 case AMD64G_CC_OP_SUBQ
: ACTIONS_SUB( 64, ULong
);
686 case AMD64G_CC_OP_SBBB
: ACTIONS_SBB( 8, UChar
);
687 case AMD64G_CC_OP_SBBW
: ACTIONS_SBB( 16, UShort
);
688 case AMD64G_CC_OP_SBBL
: ACTIONS_SBB( 32, UInt
);
689 case AMD64G_CC_OP_SBBQ
: ACTIONS_SBB( 64, ULong
);
691 case AMD64G_CC_OP_LOGICB
: ACTIONS_LOGIC( 8, UChar
);
692 case AMD64G_CC_OP_LOGICW
: ACTIONS_LOGIC( 16, UShort
);
693 case AMD64G_CC_OP_LOGICL
: ACTIONS_LOGIC( 32, UInt
);
694 case AMD64G_CC_OP_LOGICQ
: ACTIONS_LOGIC( 64, ULong
);
696 case AMD64G_CC_OP_INCB
: ACTIONS_INC( 8, UChar
);
697 case AMD64G_CC_OP_INCW
: ACTIONS_INC( 16, UShort
);
698 case AMD64G_CC_OP_INCL
: ACTIONS_INC( 32, UInt
);
699 case AMD64G_CC_OP_INCQ
: ACTIONS_INC( 64, ULong
);
701 case AMD64G_CC_OP_DECB
: ACTIONS_DEC( 8, UChar
);
702 case AMD64G_CC_OP_DECW
: ACTIONS_DEC( 16, UShort
);
703 case AMD64G_CC_OP_DECL
: ACTIONS_DEC( 32, UInt
);
704 case AMD64G_CC_OP_DECQ
: ACTIONS_DEC( 64, ULong
);
706 case AMD64G_CC_OP_SHLB
: ACTIONS_SHL( 8, UChar
);
707 case AMD64G_CC_OP_SHLW
: ACTIONS_SHL( 16, UShort
);
708 case AMD64G_CC_OP_SHLL
: ACTIONS_SHL( 32, UInt
);
709 case AMD64G_CC_OP_SHLQ
: ACTIONS_SHL( 64, ULong
);
711 case AMD64G_CC_OP_SHRB
: ACTIONS_SHR( 8, UChar
);
712 case AMD64G_CC_OP_SHRW
: ACTIONS_SHR( 16, UShort
);
713 case AMD64G_CC_OP_SHRL
: ACTIONS_SHR( 32, UInt
);
714 case AMD64G_CC_OP_SHRQ
: ACTIONS_SHR( 64, ULong
);
716 case AMD64G_CC_OP_ROLB
: ACTIONS_ROL( 8, UChar
);
717 case AMD64G_CC_OP_ROLW
: ACTIONS_ROL( 16, UShort
);
718 case AMD64G_CC_OP_ROLL
: ACTIONS_ROL( 32, UInt
);
719 case AMD64G_CC_OP_ROLQ
: ACTIONS_ROL( 64, ULong
);
721 case AMD64G_CC_OP_RORB
: ACTIONS_ROR( 8, UChar
);
722 case AMD64G_CC_OP_RORW
: ACTIONS_ROR( 16, UShort
);
723 case AMD64G_CC_OP_RORL
: ACTIONS_ROR( 32, UInt
);
724 case AMD64G_CC_OP_RORQ
: ACTIONS_ROR( 64, ULong
);
726 case AMD64G_CC_OP_UMULB
: ACTIONS_UMUL( 8, UChar
, toUChar
,
728 case AMD64G_CC_OP_UMULW
: ACTIONS_UMUL( 16, UShort
, toUShort
,
730 case AMD64G_CC_OP_UMULL
: ACTIONS_UMUL( 32, UInt
, toUInt
,
733 case AMD64G_CC_OP_UMULQ
: ACTIONS_UMULQ
;
735 case AMD64G_CC_OP_SMULB
: ACTIONS_SMUL( 8, Char
, toUChar
,
737 case AMD64G_CC_OP_SMULW
: ACTIONS_SMUL( 16, Short
, toUShort
,
739 case AMD64G_CC_OP_SMULL
: ACTIONS_SMUL( 32, Int
, toUInt
,
742 case AMD64G_CC_OP_SMULQ
: ACTIONS_SMULQ
;
744 case AMD64G_CC_OP_ANDN32
: ACTIONS_ANDN( 32, UInt
);
745 case AMD64G_CC_OP_ANDN64
: ACTIONS_ANDN( 64, ULong
);
747 case AMD64G_CC_OP_BLSI32
: ACTIONS_BLSI( 32, UInt
);
748 case AMD64G_CC_OP_BLSI64
: ACTIONS_BLSI( 64, ULong
);
750 case AMD64G_CC_OP_BLSMSK32
: ACTIONS_BLSMSK( 32, UInt
);
751 case AMD64G_CC_OP_BLSMSK64
: ACTIONS_BLSMSK( 64, ULong
);
753 case AMD64G_CC_OP_BLSR32
: ACTIONS_BLSR( 32, UInt
);
754 case AMD64G_CC_OP_BLSR64
: ACTIONS_BLSR( 64, ULong
);
756 case AMD64G_CC_OP_ADCX32
: ACTIONS_ADX( 32, UInt
, C
);
757 case AMD64G_CC_OP_ADCX64
: ACTIONS_ADX( 64, ULong
, C
);
759 case AMD64G_CC_OP_ADOX32
: ACTIONS_ADX( 32, UInt
, O
);
760 case AMD64G_CC_OP_ADOX64
: ACTIONS_ADX( 64, ULong
, O
);
763 /* shouldn't really make these calls from generated code */
764 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
765 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
766 cc_op
, cc_dep1_formal
, cc_dep2_formal
, cc_ndep_formal
);
767 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
771 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
772 /* Calculate all the 6 flags from the supplied thunk parameters. */
773 ULong
amd64g_calculate_rflags_all ( ULong cc_op
,
779 if (!initted
) initCounts();
781 if (SHOW_COUNTS_NOW
) showCounts();
784 amd64g_calculate_rflags_all_WRK ( cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
788 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
789 /* Calculate just the carry flag from the supplied thunk parameters. */
790 ULong
amd64g_calculate_rflags_c ( ULong cc_op
,
796 if (!initted
) initCounts();
799 if (SHOW_COUNTS_NOW
) showCounts();
802 /* Fast-case some common ones. */
804 case AMD64G_CC_OP_COPY
:
805 return (cc_dep1
>> AMD64G_CC_SHIFT_C
) & 1;
806 case AMD64G_CC_OP_LOGICQ
:
807 case AMD64G_CC_OP_LOGICL
:
808 case AMD64G_CC_OP_LOGICW
:
809 case AMD64G_CC_OP_LOGICB
:
811 // case AMD64G_CC_OP_SUBL:
812 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
813 // ? AMD64G_CC_MASK_C : 0;
814 // case AMD64G_CC_OP_SUBW:
815 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
816 // ? AMD64G_CC_MASK_C : 0;
817 // case AMD64G_CC_OP_SUBB:
818 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
819 // ? AMD64G_CC_MASK_C : 0;
820 // case AMD64G_CC_OP_INCL:
821 // case AMD64G_CC_OP_DECL:
822 // return cc_ndep & AMD64G_CC_MASK_C;
832 return amd64g_calculate_rflags_all_WRK(cc_op
,cc_dep1
,cc_dep2
,cc_ndep
)
837 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
839 ULong
amd64g_calculate_condition ( ULong
/*AMD64Condcode*/ cond
,
845 ULong rflags
= amd64g_calculate_rflags_all_WRK(cc_op
, cc_dep1
,
847 ULong of
,sf
,zf
,cf
,pf
;
848 ULong inv
= cond
& 1;
851 if (!initted
) initCounts();
852 tab_cond
[cc_op
][cond
]++;
854 if (SHOW_COUNTS_NOW
) showCounts();
859 case AMD64CondO
: /* OF == 1 */
860 of
= rflags
>> AMD64G_CC_SHIFT_O
;
861 return 1 & (inv
^ of
);
864 case AMD64CondZ
: /* ZF == 1 */
865 zf
= rflags
>> AMD64G_CC_SHIFT_Z
;
866 return 1 & (inv
^ zf
);
869 case AMD64CondB
: /* CF == 1 */
870 cf
= rflags
>> AMD64G_CC_SHIFT_C
;
871 return 1 & (inv
^ cf
);
875 case AMD64CondBE
: /* (CF or ZF) == 1 */
876 cf
= rflags
>> AMD64G_CC_SHIFT_C
;
877 zf
= rflags
>> AMD64G_CC_SHIFT_Z
;
878 return 1 & (inv
^ (cf
| zf
));
882 case AMD64CondS
: /* SF == 1 */
883 sf
= rflags
>> AMD64G_CC_SHIFT_S
;
884 return 1 & (inv
^ sf
);
887 case AMD64CondP
: /* PF == 1 */
888 pf
= rflags
>> AMD64G_CC_SHIFT_P
;
889 return 1 & (inv
^ pf
);
892 case AMD64CondL
: /* (SF xor OF) == 1 */
893 sf
= rflags
>> AMD64G_CC_SHIFT_S
;
894 of
= rflags
>> AMD64G_CC_SHIFT_O
;
895 return 1 & (inv
^ (sf
^ of
));
899 case AMD64CondLE
: /* ((SF xor OF) or ZF) == 1 */
900 sf
= rflags
>> AMD64G_CC_SHIFT_S
;
901 of
= rflags
>> AMD64G_CC_SHIFT_O
;
902 zf
= rflags
>> AMD64G_CC_SHIFT_Z
;
903 return 1 & (inv
^ ((sf
^ of
) | zf
));
907 /* shouldn't really make these calls from generated code */
908 vex_printf("amd64g_calculate_condition"
909 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
910 cond
, cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
911 vpanic("amd64g_calculate_condition");
916 /* VISIBLE TO LIBVEX CLIENT */
917 ULong
LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State
* vex_state
)
919 ULong rflags
= amd64g_calculate_rflags_all_WRK(
920 vex_state
->guest_CC_OP
,
921 vex_state
->guest_CC_DEP1
,
922 vex_state
->guest_CC_DEP2
,
923 vex_state
->guest_CC_NDEP
925 Long dflag
= vex_state
->guest_DFLAG
;
926 vassert(dflag
== 1 || dflag
== -1);
929 if (vex_state
->guest_IDFLAG
== 1)
931 if (vex_state
->guest_ACFLAG
== 1)
937 /* VISIBLE TO LIBVEX CLIENT */
939 LibVEX_GuestAMD64_put_rflags ( ULong rflags
,
940 /*MOD*/VexGuestAMD64State
* vex_state
)
943 if (rflags
& AMD64G_CC_MASK_D
) {
944 vex_state
->guest_DFLAG
= -1;
945 rflags
&= ~AMD64G_CC_MASK_D
;
948 vex_state
->guest_DFLAG
= 1;
951 if (rflags
& AMD64G_CC_MASK_ID
) {
952 vex_state
->guest_IDFLAG
= 1;
953 rflags
&= ~AMD64G_CC_MASK_ID
;
956 vex_state
->guest_IDFLAG
= 0;
959 if (rflags
& AMD64G_CC_MASK_AC
) {
960 vex_state
->guest_ACFLAG
= 1;
961 rflags
&= ~AMD64G_CC_MASK_AC
;
964 vex_state
->guest_ACFLAG
= 0;
966 UInt cc_mask
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
| AMD64G_CC_MASK_Z
|
967 AMD64G_CC_MASK_A
| AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
;
968 vex_state
->guest_CC_OP
= AMD64G_CC_OP_COPY
;
969 vex_state
->guest_CC_DEP1
= rflags
& cc_mask
;
970 vex_state
->guest_CC_DEP2
= 0;
971 vex_state
->guest_CC_NDEP
= 0;
974 /* VISIBLE TO LIBVEX CLIENT */
976 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag
,
977 /*MOD*/VexGuestAMD64State
* vex_state
)
979 ULong oszacp
= amd64g_calculate_rflags_all_WRK(
980 vex_state
->guest_CC_OP
,
981 vex_state
->guest_CC_DEP1
,
982 vex_state
->guest_CC_DEP2
,
983 vex_state
->guest_CC_NDEP
985 if (new_carry_flag
& 1) {
986 oszacp
|= AMD64G_CC_MASK_C
;
988 oszacp
&= ~AMD64G_CC_MASK_C
;
990 vex_state
->guest_CC_OP
= AMD64G_CC_OP_COPY
;
991 vex_state
->guest_CC_DEP1
= oszacp
;
992 vex_state
->guest_CC_DEP2
= 0;
993 vex_state
->guest_CC_NDEP
= 0;
996 /*---------------------------------------------------------------*/
997 /*--- %rflags translation-time function specialisers. ---*/
998 /*--- These help iropt specialise calls the above run-time ---*/
999 /*--- %rflags functions. ---*/
1000 /*---------------------------------------------------------------*/
1002 /* Used by the optimiser to try specialisations. Returns an
1003 equivalent expression, or NULL if none. */
1005 static inline Bool
isU64 ( IRExpr
* e
, ULong n
)
1007 return e
->tag
== Iex_Const
1008 && e
->Iex
.Const
.con
->tag
== Ico_U64
1009 && e
->Iex
.Const
.con
->Ico
.U64
== n
;
1012 /* Returns N if W64 is a value of the form 1 << N for N in 1 to 31,
1013 and zero in any other case. */
1014 static Int
isU64_1_shl_N_literal ( ULong w64
)
1016 if (w64
< (1ULL << 1) || w64
> (1ULL << 31))
1018 if ((w64
& (w64
- 1)) != 0)
1020 /* At this point, we know w64 is a power of two in the range 2^1 .. 2^31,
1021 and we only need to find out which one it is. */
1022 for (Int n
= 1; n
<= 31; n
++) {
1023 if (w64
== (1ULL << n
))
1026 /* Consequently we should never get here. */
1032 /* Returns N if E is an immediate of the form 1 << N for N in 1 to 31,
1033 and zero in any other case. */
1034 static Int
isU64_1_shl_N ( IRExpr
* e
)
1036 if (e
->tag
!= Iex_Const
|| e
->Iex
.Const
.con
->tag
!= Ico_U64
)
1038 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
1039 return isU64_1_shl_N_literal(w64
);
1042 /* Returns N if E is an immediate of the form (1 << N) - 1 for N in 1 to 31,
1043 and zero in any other case. */
1044 static Int
isU64_1_shl_N_minus_1 ( IRExpr
* e
)
1046 if (e
->tag
!= Iex_Const
|| e
->Iex
.Const
.con
->tag
!= Ico_U64
)
1048 ULong w64
= e
->Iex
.Const
.con
->Ico
.U64
;
1049 // This isn't actually necessary since isU64_1_shl_N_literal will return
1050 // zero given a zero argument, but still ..
1051 if (w64
== 0xFFFFFFFFFFFFFFFFULL
)
1053 return isU64_1_shl_N_literal(w64
+ 1);
1056 IRExpr
* guest_amd64_spechelper ( const HChar
* function_name
,
1058 IRStmt
** precedingStmts
,
1059 Int n_precedingStmts
)
1061 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1062 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1063 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1064 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1065 # define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
1066 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1069 for (i
= 0; args
[i
]; i
++)
1072 vex_printf("spec request:\n");
1073 vex_printf(" %s ", function_name
);
1074 for (i
= 0; i
< arity
; i
++) {
1081 /* --------- specialising "amd64g_calculate_condition" --------- */
1083 if (vex_streq(function_name
, "amd64g_calculate_condition")) {
1084 /* specialise calls to above "calculate condition" function */
1085 IRExpr
*cond
, *cc_op
, *cc_dep1
, *cc_dep2
;
1086 vassert(arity
== 5);
1092 /*---------------- ADDQ ----------------*/
1095 if (isU64(cc_op
, AMD64G_CC_OP_ADDQ
) && isU64(cond
, AMD64CondZ
)) {
1096 /* long long add, then Z --> test (dst+src == 0) */
1097 return unop(Iop_1Uto64
,
1099 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1104 if (isU64(cc_op
, AMD64G_CC_OP_ADDQ
) && isU64(cond
, AMD64CondS
)) {
1105 /* long long add, then S (negative)
1107 --> ((dst + src) >>u 63) & 1
1109 return binop(Iop_And64
,
1111 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1116 /*---------------- ADDL ----------------*/
1119 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondO
)) {
1120 /* This is very commonly generated by Javascript JITs, for
1121 the idiom "do a 32-bit add and jump to out-of-line code if
1122 an overflow occurs". */
1123 /* long add, then O (overflow)
1124 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1125 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1126 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1128 vassert(isIRAtom(cc_dep1
));
1129 vassert(isIRAtom(cc_dep2
));
1135 binop(Iop_Xor64
, cc_dep1
, cc_dep2
)),
1138 binop(Iop_Add64
, cc_dep1
, cc_dep2
))),
1145 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondZ
)) {
1146 /* long add, then Z --> test ((int)(dst+src) == 0) */
1147 return unop(Iop_1Uto64
,
1149 unop(Iop_64to32
, binop(Iop_Add64
, cc_dep1
, cc_dep2
)),
1154 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondS
)) {
1155 /* long add, then S (negative)
1157 --> ((dst +64 src) >>u 31) & 1
1158 Pointless to narrow the args to 32 bit before the add. */
1159 return binop(Iop_And64
,
1161 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1165 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
) && isU64(cond
, AMD64CondNS
)) {
1166 /* long add, then NS (not negative)
1167 --> (dst+src)[31] ^ 1
1168 --> (((dst +64 src) >>u 31) & 1) ^ 1
1169 Pointless to narrow the args to 32 bit before the add. */
1170 return binop(Iop_Xor64
,
1173 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
1179 /*---------------- ADDW ----------------*/
1182 if (isU64(cc_op
, AMD64G_CC_OP_ADDW
) && isU64(cond
, AMD64CondZ
)) {
1184 /* word add, then Z --> test ((short)(dst+src) == 0) */
1185 return unop(Iop_1Uto64
,
1187 unop(Iop_64to16
, binop(Iop_Add64
, cc_dep1
, cc_dep2
)),
1191 /*---------------- ADDB ----------------*/
1194 if (isU64(cc_op
, AMD64G_CC_OP_ADDB
) && isU64(cond
, AMD64CondZ
)) {
1195 /* byte add, then Z --> test ((char)(dst+src) == 0) */
1196 return unop(Iop_1Uto64
,
1198 unop(Iop_64to8
, binop(Iop_Add64
, cc_dep1
, cc_dep2
)),
1202 /*---------------- SUBQ ----------------*/
1205 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondO
)) {
1206 /* long long sub/cmp, then O (overflow)
1207 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1208 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1210 vassert(isIRAtom(cc_dep1
));
1211 vassert(isIRAtom(cc_dep2
));
1212 return binop(Iop_Shr64
,
1214 binop(Iop_Xor64
, cc_dep1
, cc_dep2
),
1217 binop(Iop_Sub64
, cc_dep1
, cc_dep2
))),
1220 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNO
)) {
1221 /* No action. Never yet found a test case. */
1225 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondB
)) {
1226 /* long long sub/cmp, then B (unsigned less than)
1227 --> test dst <u src */
1228 return unop(Iop_1Uto64
,
1229 binop(Iop_CmpLT64U
, cc_dep1
, cc_dep2
));
1231 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNB
)) {
1232 /* long long sub/cmp, then NB (unsigned greater than or equal)
1233 --> test src <=u dst */
1234 /* Note, args are opposite way round from the usual */
1235 return unop(Iop_1Uto64
,
1236 binop(Iop_CmpLE64U
, cc_dep2
, cc_dep1
));
1240 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondZ
)) {
1241 /* long long sub/cmp, then Z --> test dst==src */
1242 return unop(Iop_1Uto64
,
1243 binop(Iop_CmpEQ64
,cc_dep1
,cc_dep2
));
1245 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNZ
)) {
1246 /* long long sub/cmp, then NZ --> test dst!=src */
1247 return unop(Iop_1Uto64
,
1248 binop(Iop_CmpNE64
,cc_dep1
,cc_dep2
));
1252 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondBE
)) {
1253 /* long long sub/cmp, then BE (unsigned less than or equal)
1254 --> test dst <=u src */
1255 return unop(Iop_1Uto64
,
1256 binop(Iop_CmpLE64U
, cc_dep1
, cc_dep2
));
1258 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNBE
)) {
1259 /* long long sub/cmp, then NBE (unsigned greater than)
1260 --> test !(dst <=u src) */
1261 return binop(Iop_Xor64
,
1263 binop(Iop_CmpLE64U
, cc_dep1
, cc_dep2
)),
1268 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondS
)) {
1269 /* long long sub/cmp, then S (negative)
1271 --> (dst-src) >>u 63 */
1272 return binop(Iop_Shr64
,
1273 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1276 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNS
)) {
1277 /* long long sub/cmp, then NS (not negative)
1278 --> (dst-src)[63] ^ 1
1279 --> ((dst-src) >>u 63) ^ 1 */
1280 return binop(Iop_Xor64
,
1282 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1288 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondL
)) {
1289 /* long long sub/cmp, then L (signed less than)
1290 --> test dst <s src */
1291 return unop(Iop_1Uto64
,
1292 binop(Iop_CmpLT64S
, cc_dep1
, cc_dep2
));
1294 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNL
)) {
1295 /* long long sub/cmp, then NL (signed greater than or equal)
1296 --> test dst >=s src
1297 --> test src <=s dst */
1298 return unop(Iop_1Uto64
,
1299 binop(Iop_CmpLE64S
, cc_dep2
, cc_dep1
));
1303 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondLE
)) {
1304 /* long long sub/cmp, then LE (signed less than or equal)
1305 --> test dst <=s src */
1306 return unop(Iop_1Uto64
,
1307 binop(Iop_CmpLE64S
, cc_dep1
, cc_dep2
));
1309 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
) && isU64(cond
, AMD64CondNLE
)) {
1310 /* long long sub/cmp, then NLE (signed greater than)
1311 --> test !(dst <=s src)
1312 --> test (dst >s src)
1313 --> test (src <s dst) */
1314 return unop(Iop_1Uto64
,
1315 binop(Iop_CmpLT64S
, cc_dep2
, cc_dep1
));
1319 /*---------------- SUBL ----------------*/
1322 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondO
)) {
1323 /* This is very commonly generated by Javascript JITs, for
1324 the idiom "do a 32-bit subtract and jump to out-of-line
1325 code if an overflow occurs". */
1326 /* long sub/cmp, then O (overflow)
1327 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1328 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1330 vassert(isIRAtom(cc_dep1
));
1331 vassert(isIRAtom(cc_dep2
));
1336 binop(Iop_Xor64
, cc_dep1
, cc_dep2
),
1339 binop(Iop_Sub64
, cc_dep1
, cc_dep2
))),
1345 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNO
)) {
1346 /* No action. Never yet found a test case. */
1351 /* It appears that LLVM 5.0 and later have a new way to find out
1352 whether the top N bits of a word W are all zero, by computing
1354 W <u 0---(N-1)---0 1 0---0 or
1355 W <=u 0---(N-1)---0 0 1---1
1357 In particular, the result will be defined if the top N bits of W
1358 are defined, even if the trailing bits -- those corresponding to
1359 the rightmost 0---0 / 1---1 section -- are undefined. Rather than
1360 make Memcheck more complex, we detect this case where we can and
1361 shift out the irrelevant and potentially undefined bits. */
1363 Bool is_NB_or_NBE
= False
;
1364 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
)) {
1365 if (isU64(cond
, AMD64CondB
) || isU64(cond
, AMD64CondNB
)) {
1366 /* long sub/cmp, then B (unsigned less than),
1367 where dep2 is a power of 2:
1368 -> CmpLT32U(dep1, 1 << N)
1369 -> CmpEQ32(dep1 >>u N, 0)
1371 long sub/cmp, then NB (unsigned greater than or equal),
1372 where dep2 is a power of 2:
1373 -> CmpGE32U(dep1, 1 << N)
1374 -> CmpNE32(dep1 >>u N, 0)
1375 This avoids CmpLT32U/CmpGE32U being applied to potentially
1376 uninitialised bits in the area being shifted out. */
1377 n
= isU64_1_shl_N(cc_dep2
);
1378 is_NB_or_NBE
= isU64(cond
, AMD64CondNB
);
1379 } else if (isU64(cond
, AMD64CondBE
) || isU64(cond
, AMD64CondNBE
)) {
1380 /* long sub/cmp, then BE (unsigned less than or equal),
1381 where dep2 is a power of 2 minus 1:
1382 -> CmpLE32U(dep1, (1 << N) - 1)
1383 -> CmpEQ32(dep1 >>u N, 0)
1385 long sub/cmp, then NBE (unsigned greater than),
1386 where dep2 is a power of 2 minus 1:
1387 -> CmpGT32U(dep1, (1 << N) - 1)
1388 -> CmpNE32(dep1 >>u N, 0)
1389 This avoids CmpLE32U/CmpGT32U being applied to potentially
1390 uninitialised bits in the area being shifted out. */
1391 n
= isU64_1_shl_N_minus_1(cc_dep2
);
1392 is_NB_or_NBE
= isU64(cond
, AMD64CondNBE
);
1396 vassert(n
>= 1 && n
<= 31);
1397 return unop(Iop_1Uto64
,
1398 binop(is_NB_or_NBE
? Iop_CmpNE32
: Iop_CmpEQ32
,
1399 binop(Iop_Shr32
, unop(Iop_64to32
, cc_dep1
),
1404 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondB
)) {
1405 /* long sub/cmp, then B (unsigned less than)
1406 --> test dst <u src */
1407 return unop(Iop_1Uto64
,
1409 unop(Iop_64to32
, cc_dep1
),
1410 unop(Iop_64to32
, cc_dep2
)));
1412 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNB
)) {
1413 /* long sub/cmp, then NB (unsigned greater than or equal)
1414 --> test src <=u dst */
1415 /* Note, args are opposite way round from the usual */
1416 return unop(Iop_1Uto64
,
1418 unop(Iop_64to32
, cc_dep2
),
1419 unop(Iop_64to32
, cc_dep1
)));
1423 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondZ
)) {
1424 /* long sub/cmp, then Z --> test dst==src */
1425 return unop(Iop_1Uto64
,
1427 unop(Iop_64to32
, cc_dep1
),
1428 unop(Iop_64to32
, cc_dep2
)));
1430 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNZ
)) {
1431 /* long sub/cmp, then NZ --> test dst!=src */
1432 return unop(Iop_1Uto64
,
1434 unop(Iop_64to32
, cc_dep1
),
1435 unop(Iop_64to32
, cc_dep2
)));
1439 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondBE
)) {
1440 /* long sub/cmp, then BE (unsigned less than or equal)
1441 --> test dst <=u src */
1442 return unop(Iop_1Uto64
,
1444 unop(Iop_64to32
, cc_dep1
),
1445 unop(Iop_64to32
, cc_dep2
)));
1447 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNBE
)) {
1448 /* long sub/cmp, then NBE (unsigned greater than)
1449 --> test src <u dst */
1450 /* Note, args are opposite way round from the usual */
1451 return unop(Iop_1Uto64
,
1453 unop(Iop_64to32
, cc_dep2
),
1454 unop(Iop_64to32
, cc_dep1
)));
1458 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondS
)) {
1459 /* long sub/cmp, then S (negative)
1461 --> ((dst -64 src) >>u 31) & 1
1462 Pointless to narrow the args to 32 bit before the subtract. */
1463 return binop(Iop_And64
,
1465 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1469 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNS
)) {
1470 /* long sub/cmp, then NS (not negative)
1471 --> (dst-src)[31] ^ 1
1472 --> (((dst -64 src) >>u 31) & 1) ^ 1
1473 Pointless to narrow the args to 32 bit before the subtract. */
1474 return binop(Iop_Xor64
,
1477 binop(Iop_Sub64
, cc_dep1
, cc_dep2
),
1484 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondL
)) {
1485 /* long sub/cmp, then L (signed less than)
1486 --> test dst <s src */
1487 return unop(Iop_1Uto64
,
1489 unop(Iop_64to32
, cc_dep1
),
1490 unop(Iop_64to32
, cc_dep2
)));
1492 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNL
)) {
1493 /* long sub/cmp, then NL (signed greater than or equal)
1494 --> test dst >=s src
1495 --> test src <=s dst */
1496 return unop(Iop_1Uto64
,
1498 unop(Iop_64to32
, cc_dep2
),
1499 unop(Iop_64to32
, cc_dep1
)));
1503 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondLE
)) {
1504 /* long sub/cmp, then LE (signed less than or equal)
1505 --> test dst <=s src */
1506 return unop(Iop_1Uto64
,
1508 unop(Iop_64to32
, cc_dep1
),
1509 unop(Iop_64to32
, cc_dep2
)));
1512 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
) && isU64(cond
, AMD64CondNLE
)) {
1513 /* long sub/cmp, then NLE (signed greater than)
1514 --> test !(dst <=s src)
1515 --> test (dst >s src)
1516 --> test (src <s dst) */
1517 return unop(Iop_1Uto64
,
1519 unop(Iop_64to32
, cc_dep2
),
1520 unop(Iop_64to32
, cc_dep1
)));
1524 /*---------------- SUBW ----------------*/
1527 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondZ
)) {
1528 /* word sub/cmp, then Z --> test dst==src */
1529 return unop(Iop_1Uto64
,
1531 unop(Iop_64to16
,cc_dep1
),
1532 unop(Iop_64to16
,cc_dep2
)));
1534 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondNZ
)) {
1535 /* word sub/cmp, then NZ --> test dst!=src */
1536 return unop(Iop_1Uto64
,
1538 unop(Iop_64to16
,cc_dep1
),
1539 unop(Iop_64to16
,cc_dep2
)));
1543 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondBE
)) {
1544 /* word sub/cmp, then BE (unsigned less than or equal)
1545 --> test dst <=u src */
1546 return unop(Iop_1Uto64
,
1548 binop(Iop_Shl64
, cc_dep1
, mkU8(48)),
1549 binop(Iop_Shl64
, cc_dep2
, mkU8(48))));
1553 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondS
)
1554 && isU64(cc_dep2
, 0)) {
1555 /* word sub/cmp of zero, then S --> test (dst-0 <s 0)
1558 This is yet another scheme by which clang figures out if the
1559 top bit of a word is 1 or 0. See also LOGICB/CondS below. */
1560 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1561 for an 16-bit comparison, since the args to the helper
1562 function are always U64s. */
1563 return binop(Iop_And64
,
1564 binop(Iop_Shr64
,cc_dep1
,mkU8(15)),
1567 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondNS
)
1568 && isU64(cc_dep2
, 0)) {
1569 /* word sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1570 --> test !(dst <s 0)
1571 --> (ULong) !dst[15]
1573 return binop(Iop_Xor64
,
1575 binop(Iop_Shr64
,cc_dep1
,mkU8(15)),
1581 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
) && isU64(cond
, AMD64CondLE
)) {
1582 /* word sub/cmp, then LE (signed less than or equal)
1583 --> test dst <=s src */
1584 return unop(Iop_1Uto64
,
1586 binop(Iop_Shl64
,cc_dep1
,mkU8(48)),
1587 binop(Iop_Shl64
,cc_dep2
,mkU8(48))));
1591 /*---------------- SUBB ----------------*/
1594 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondB
)) {
1595 /* byte sub/cmp, then B (unsigned less than)
1596 --> test dst <u src */
1597 return unop(Iop_1Uto64
,
1599 binop(Iop_And64
, cc_dep1
, mkU64(0xFF)),
1600 binop(Iop_And64
, cc_dep2
, mkU64(0xFF))));
1602 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondNB
)) {
1603 /* byte sub/cmp, then NB (unsigned greater than or equal)
1604 --> test src <=u dst */
1605 /* Note, args are opposite way round from the usual */
1606 return unop(Iop_1Uto64
,
1608 binop(Iop_And64
, cc_dep2
, mkU64(0xFF)),
1609 binop(Iop_And64
, cc_dep1
, mkU64(0xFF))));
1613 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondZ
)) {
1614 /* byte sub/cmp, then Z --> test dst==src */
1615 return unop(Iop_1Uto64
,
1617 unop(Iop_64to8
,cc_dep1
),
1618 unop(Iop_64to8
,cc_dep2
)));
1620 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondNZ
)) {
1621 /* byte sub/cmp, then NZ --> test dst!=src */
1622 return unop(Iop_1Uto64
,
1624 unop(Iop_64to8
,cc_dep1
),
1625 unop(Iop_64to8
,cc_dep2
)));
1629 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondBE
)) {
1630 /* byte sub/cmp, then BE (unsigned less than or equal)
1631 --> test dst <=u src */
1632 return unop(Iop_1Uto64
,
1634 binop(Iop_And64
, cc_dep1
, mkU64(0xFF)),
1635 binop(Iop_And64
, cc_dep2
, mkU64(0xFF))));
1639 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondS
)
1640 && isU64(cc_dep2
, 0)) {
1641 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1644 This is yet another scheme by which gcc figures out if the
1645 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1646 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1647 for an 8-bit comparison, since the args to the helper
1648 function are always U64s. */
1649 return binop(Iop_And64
,
1650 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1653 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
) && isU64(cond
, AMD64CondNS
)
1654 && isU64(cc_dep2
, 0)) {
1655 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1656 --> test !(dst <s 0)
1659 return binop(Iop_Xor64
,
1661 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1666 /*---------------- LOGICQ ----------------*/
1668 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondZ
)) {
1669 /* long long and/or/xor, then Z --> test dst==0 */
1670 return unop(Iop_1Uto64
,
1671 binop(Iop_CmpEQ64
, cc_dep1
, mkU64(0)));
1673 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondNZ
)) {
1674 /* long long and/or/xor, then NZ --> test dst!=0 */
1675 return unop(Iop_1Uto64
,
1676 binop(Iop_CmpNE64
, cc_dep1
, mkU64(0)));
1679 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondL
)) {
1680 /* long long and/or/xor, then L
1681 LOGIC sets SF and ZF according to the
1682 result and makes OF be zero. L computes SF ^ OF, but
1683 OF is zero, so this reduces to SF -- which will be 1 iff
1684 the result is < signed 0. Hence ...
1686 return unop(Iop_1Uto64
,
1693 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondS
)) {
1694 /* long long and/or/xor, then S --> (ULong)result[63] */
1695 return binop(Iop_Shr64
, cc_dep1
, mkU8(63));
1698 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
) && isU64(cond
, AMD64CondNS
)) {
1699 /* long long and/or/xor, then S --> (ULong) ~ result[63] */
1700 return binop(Iop_Xor64
,
1701 binop(Iop_Shr64
, cc_dep1
, mkU8(63)),
1705 /*---------------- LOGICL ----------------*/
1707 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondZ
)) {
1708 /* long and/or/xor, then Z --> test dst==0 */
1709 return unop(Iop_1Uto64
,
1711 unop(Iop_64to32
, cc_dep1
),
1714 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondNZ
)) {
1715 /* long and/or/xor, then NZ --> test dst!=0 */
1716 return unop(Iop_1Uto64
,
1718 unop(Iop_64to32
, cc_dep1
),
1722 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondLE
)) {
1723 /* long and/or/xor, then LE
1724 This is pretty subtle. LOGIC sets SF and ZF according to the
1725 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1726 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1727 the result is <=signed 0. Hence ...
1729 return unop(Iop_1Uto64
,
1731 unop(Iop_64to32
, cc_dep1
),
1735 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondS
)) {
1736 /* long and/or/xor, then S --> (ULong)result[31] */
1737 return binop(Iop_And64
,
1738 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1741 if (isU64(cc_op
, AMD64G_CC_OP_LOGICL
) && isU64(cond
, AMD64CondNS
)) {
1742 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1743 return binop(Iop_Xor64
,
1745 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1750 /*---------------- LOGICW ----------------*/
1752 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondZ
)) {
1753 /* word and/or/xor, then Z --> test dst==0 */
1754 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1755 // it exactly at EdcAUTO.
1756 return unop(Iop_1Uto64
,
1758 unop(Iop_16Uto32
, unop(Iop_64to16
, cc_dep1
)),
1761 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondNZ
)) {
1762 /* word and/or/xor, then NZ --> test dst!=0 */
1763 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1764 // it exactly at EdcAUTO.
1765 return unop(Iop_1Uto64
,
1767 unop(Iop_16Uto32
, unop(Iop_64to16
, cc_dep1
)),
1771 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondS
)) {
1772 /* word and/or/xor, then S --> (ULong)result[15] */
1773 return binop(Iop_And64
,
1774 binop(Iop_Shr64
, cc_dep1
, mkU8(15)),
1777 if (isU64(cc_op
, AMD64G_CC_OP_LOGICW
) && isU64(cond
, AMD64CondNS
)) {
1778 /* word and/or/xor, then S --> (ULong) ~ result[15] */
1779 return binop(Iop_Xor64
,
1781 binop(Iop_Shr64
, cc_dep1
, mkU8(15)),
1786 /*---------------- LOGICB ----------------*/
1788 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondZ
)) {
1789 /* byte and/or/xor, then Z --> test dst==0 */
1790 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1791 // it exactly at EdcAUTO.
1792 return unop(Iop_1Uto64
,
1794 unop(Iop_8Uto32
, unop(Iop_64to8
, cc_dep1
)),
1797 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondNZ
)) {
1798 /* byte and/or/xor, then NZ --> test dst!=0 */
1799 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1800 // it exactly at EdcAUTO.
1801 return unop(Iop_1Uto64
,
1803 unop(Iop_8Uto32
, unop(Iop_64to8
, cc_dep1
)),
1807 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondS
)) {
1808 /* this is an idiom gcc sometimes uses to find out if the top
1809 bit of a byte register is set: eg testb %al,%al; js ..
1810 Since it just depends on the top bit of the byte, extract
1811 that bit and explicitly get rid of all the rest. This
1812 helps memcheck avoid false positives in the case where any
1813 of the other bits in the byte are undefined. */
1814 /* byte and/or/xor, then S --> (UInt)result[7] */
1815 return binop(Iop_And64
,
1816 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1819 if (isU64(cc_op
, AMD64G_CC_OP_LOGICB
) && isU64(cond
, AMD64CondNS
)) {
1820 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1821 return binop(Iop_Xor64
,
1823 binop(Iop_Shr64
,cc_dep1
,mkU8(7)),
1828 /*---------------- INCB ----------------*/
1830 if (isU64(cc_op
, AMD64G_CC_OP_INCB
) && isU64(cond
, AMD64CondLE
)) {
1831 /* 8-bit inc, then LE --> sign bit of the arg */
1832 return binop(Iop_And64
,
1834 binop(Iop_Sub64
, cc_dep1
, mkU64(1)),
1839 /*---------------- INCW ----------------*/
1841 if (isU64(cc_op
, AMD64G_CC_OP_INCW
) && isU64(cond
, AMD64CondZ
)) {
1842 /* 16-bit inc, then Z --> test dst == 0 */
1843 return unop(Iop_1Uto64
,
1845 binop(Iop_Shl64
,cc_dep1
,mkU8(48)),
1849 /*---------------- DECL ----------------*/
1851 if (isU64(cc_op
, AMD64G_CC_OP_DECL
) && isU64(cond
, AMD64CondZ
)) {
1852 /* dec L, then Z --> test dst == 0 */
1853 return unop(Iop_1Uto64
,
1855 unop(Iop_64to32
, cc_dep1
),
1859 /*---------------- DECW ----------------*/
1861 if (isU64(cc_op
, AMD64G_CC_OP_DECW
) && isU64(cond
, AMD64CondNZ
)) {
1862 /* 16-bit dec, then NZ --> test dst != 0 */
1863 return unop(Iop_1Uto64
,
1865 binop(Iop_Shl64
,cc_dep1
,mkU8(48)),
1869 /*---------------- SHRQ ----------------*/
1871 if (isU64(cc_op
, AMD64G_CC_OP_SHRQ
) && isU64(cond
, AMD64CondZ
)) {
1872 /* SHRQ, then Z --> test result[63:0] == 0 */
1873 return unop(Iop_1Uto64
,
1874 binop(Iop_CmpEQ64
, cc_dep1
, mkU64(0)));
1876 if (isU64(cc_op
, AMD64G_CC_OP_SHRQ
) && isU64(cond
, AMD64CondNZ
)) {
1877 /* SHRQ, then NZ --> test result[63:0] != 0 */
1878 return unop(Iop_1Uto64
,
1879 binop(Iop_CmpNE64
, cc_dep1
, mkU64(0)));
1882 if (isU64(cc_op
, AMD64G_CC_OP_SHRQ
) && isU64(cond
, AMD64CondS
)) {
1883 /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */
1884 return binop(Iop_Shr64
, cc_dep1
, mkU8(63));
1886 // No known test case for this, hence disabled:
1887 //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) {
1888 // /* SHRQ, then NS --> (ULong) ~ result[63] */
1892 /*---------------- SHRL ----------------*/
1894 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondZ
)) {
1895 /* SHRL, then Z --> test dep1 == 0 */
1896 return unop(Iop_1Uto64
,
1897 binop(Iop_CmpEQ32
, unop(Iop_64to32
, cc_dep1
),
1900 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondNZ
)) {
1901 /* SHRL, then NZ --> test dep1 != 0 */
1902 return unop(Iop_1Uto64
,
1903 binop(Iop_CmpNE32
, unop(Iop_64to32
, cc_dep1
),
1907 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondS
)) {
1908 /* SHRL/SARL, then S --> (ULong)result[31] */
1909 return binop(Iop_And64
,
1910 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1913 if (isU64(cc_op
, AMD64G_CC_OP_SHRL
) && isU64(cond
, AMD64CondNS
)) {
1914 /* SHRL/SARL, then NS --> (ULong) ~ result[31] */
1915 return binop(Iop_Xor64
,
1917 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1922 /*---------------- SHRW ----------------*/
1924 if (isU64(cc_op
, AMD64G_CC_OP_SHRW
) && isU64(cond
, AMD64CondZ
)) {
1925 /* SHRW, then Z --> test dep1 == 0 */
1926 return unop(Iop_1Uto64
,
1928 unop(Iop_16Uto32
, unop(Iop_64to16
, cc_dep1
)),
1931 // No known test case for this, hence disabled:
1932 //if (isU64(cc_op, AMD64G_CC_OP_SHRW) && isU64(cond, AMD64CondNZ)) {
1933 // /* SHRW, then NZ --> test dep1 == 0 */
1934 // return unop(Iop_1Uto64,
1935 // binop(Iop_CmpNE32,
1936 // unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1940 /*---------------- SHLQ ----------------*/
1942 if (isU64(cc_op
, AMD64G_CC_OP_SHLQ
) && isU64(cond
, AMD64CondZ
)) {
1943 /* SHLQ, then Z --> test dep1 == 0 */
1944 return unop(Iop_1Uto64
,
1945 binop(Iop_CmpEQ64
, cc_dep1
, mkU64(0)));
1947 if (isU64(cc_op
, AMD64G_CC_OP_SHLQ
) && isU64(cond
, AMD64CondNZ
)) {
1948 /* SHLQ, then NZ --> test dep1 != 0 */
1949 return unop(Iop_1Uto64
,
1950 binop(Iop_CmpNE64
, cc_dep1
, mkU64(0)));
1954 if (isU64(cc_op
, AMD64G_CC_OP_SHLQ
) && isU64(cond
, AMD64CondS
)) {
1955 /* SHLQ, then S --> (ULong)result[63] */
1956 return binop(Iop_Shr64
, cc_dep1
, mkU8(63));
1958 // No known test case
1959 //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) {
1960 // /* SHLQ, then NS --> (ULong) ~ result[63] */
1964 /*---------------- SHLL ----------------*/
1966 if (isU64(cc_op
, AMD64G_CC_OP_SHLL
) && isU64(cond
, AMD64CondZ
)) {
1967 /* SHLL, then Z --> test result[31:0] == 0 */
1968 return unop(Iop_1Uto64
,
1969 binop(Iop_CmpEQ32
, unop(Iop_64to32
, cc_dep1
),
1973 if (isU64(cc_op
, AMD64G_CC_OP_SHLL
) && isU64(cond
, AMD64CondNZ
)) {
1974 /* SHLL, then NZ --> test dep1 != 0 */
1975 return unop(Iop_1Uto64
,
1976 binop(Iop_CmpNE32
, unop(Iop_64to32
, cc_dep1
),
1980 if (isU64(cc_op
, AMD64G_CC_OP_SHLL
) && isU64(cond
, AMD64CondS
)) {
1981 /* SHLL, then S --> (ULong)result[31] */
1982 return binop(Iop_And64
,
1983 binop(Iop_Shr64
, cc_dep1
, mkU8(31)),
1986 // No known test case
1987 //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) {
1988 // /* SHLL, then NS --> (ULong) ~ result[31] */
1992 /*---------------- COPY ----------------*/
1993 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1994 jbe" for example. */
1996 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
1997 && (isU64(cond
, AMD64CondBE
) || isU64(cond
, AMD64CondNBE
))) {
1998 /* COPY, then BE --> extract C and Z from dep1, and test (C
2000 /* COPY, then NBE --> extract C and Z from dep1, and test (C
2002 ULong nnn
= isU64(cond
, AMD64CondBE
) ? 1 : 0;
2012 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_C
)),
2013 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_Z
))
2022 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
2023 && (isU64(cond
, AMD64CondB
) || isU64(cond
, AMD64CondNB
))) {
2024 /* COPY, then B --> extract C from dep1, and test (C == 1). */
2025 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
2026 ULong nnn
= isU64(cond
, AMD64CondB
) ? 1 : 0;
2034 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_C
)),
2042 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
2043 && (isU64(cond
, AMD64CondZ
) || isU64(cond
, AMD64CondNZ
))) {
2044 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
2045 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
2046 ULong nnn
= isU64(cond
, AMD64CondZ
) ? 1 : 0;
2054 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_Z
)),
2062 if (isU64(cc_op
, AMD64G_CC_OP_COPY
)
2063 && (isU64(cond
, AMD64CondP
) || isU64(cond
, AMD64CondNP
))) {
2064 /* COPY, then P --> extract P from dep1, and test (P == 1). */
2065 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
2066 ULong nnn
= isU64(cond
, AMD64CondP
) ? 1 : 0;
2074 binop(Iop_Shr64
, cc_dep1
, mkU8(AMD64G_CC_SHIFT_P
)),
2083 if (cond
->tag
== Iex_Const
&& cc_op
->tag
== Iex_Const
) {
2084 vex_printf("spec request failed: ");
2085 vex_printf(" %s ", function_name
);
2086 for (i
= 0; i
< 2/*arity*/; i
++) {
2097 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
2099 if (vex_streq(function_name
, "amd64g_calculate_rflags_c")) {
2100 /* specialise calls to above "calculate_rflags_c" function */
2101 IRExpr
*cc_op
, *cc_dep1
, *cc_dep2
, *cc_ndep
;
2102 vassert(arity
== 4);
2108 if (isU64(cc_op
, AMD64G_CC_OP_SUBQ
)) {
2109 /* C after sub denotes unsigned less than */
2110 return unop(Iop_1Uto64
,
2115 if (isU64(cc_op
, AMD64G_CC_OP_SUBL
)) {
2116 /* C after sub denotes unsigned less than */
2117 return unop(Iop_1Uto64
,
2119 unop(Iop_64to32
, cc_dep1
),
2120 unop(Iop_64to32
, cc_dep2
)));
2122 if (isU64(cc_op
, AMD64G_CC_OP_SUBW
)) {
2123 /* C after sub denotes unsigned less than */
2124 return unop(Iop_1Uto64
,
2126 binop(Iop_And64
,cc_dep1
,mkU64(0xFFFF)),
2127 binop(Iop_And64
,cc_dep2
,mkU64(0xFFFF))));
2129 if (isU64(cc_op
, AMD64G_CC_OP_SUBB
)) {
2130 /* C after sub denotes unsigned less than */
2131 return unop(Iop_1Uto64
,
2133 binop(Iop_And64
,cc_dep1
,mkU64(0xFF)),
2134 binop(Iop_And64
,cc_dep2
,mkU64(0xFF))));
2136 if (isU64(cc_op
, AMD64G_CC_OP_ADDQ
)) {
2137 /* C after add denotes sum <u either arg */
2138 return unop(Iop_1Uto64
,
2140 binop(Iop_Add64
, cc_dep1
, cc_dep2
),
2143 if (isU64(cc_op
, AMD64G_CC_OP_ADDL
)) {
2144 /* C after add denotes sum <u either arg */
2145 return unop(Iop_1Uto64
,
2147 unop(Iop_64to32
, binop(Iop_Add64
, cc_dep1
, cc_dep2
)),
2148 unop(Iop_64to32
, cc_dep1
)));
2150 if (isU64(cc_op
, AMD64G_CC_OP_LOGICQ
)
2151 || isU64(cc_op
, AMD64G_CC_OP_LOGICL
)
2152 || isU64(cc_op
, AMD64G_CC_OP_LOGICW
)
2153 || isU64(cc_op
, AMD64G_CC_OP_LOGICB
)) {
2154 /* cflag after logic is zero */
2157 if (isU64(cc_op
, AMD64G_CC_OP_DECL
)
2158 || isU64(cc_op
, AMD64G_CC_OP_INCL
)
2159 || isU64(cc_op
, AMD64G_CC_OP_DECQ
)
2160 || isU64(cc_op
, AMD64G_CC_OP_INCQ
)) {
2161 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
2166 if (cc_op
->tag
== Iex_Const
) {
2167 vex_printf("CFLAG "); ppIRExpr(cc_op
); vex_printf("\n");
2172 if (cc_op
->tag
== Iex_Const
) {
2173 vex_printf("spec request failed: ");
2174 vex_printf(" %s ", function_name
);
2175 for (i
= 0; i
< 2/*arity*/; i
++) {
2196 /*---------------------------------------------------------------*/
2197 /*--- Supporting functions for x87 FPU activities. ---*/
2198 /*---------------------------------------------------------------*/
2200 static inline Bool
host_is_little_endian ( void )
2202 UInt x
= 0x76543210;
2203 UChar
* p
= (UChar
*)(&x
);
2204 return toBool(*p
== 0x10);
2207 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
2208 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2209 ULong
amd64g_calculate_FXAM ( ULong tag
, ULong dbl
)
2211 Bool mantissaIsZero
;
2216 vassert(host_is_little_endian());
2218 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
2220 f64
= (UChar
*)(&dbl
);
2221 sign
= toUChar( (f64
[7] >> 7) & 1 );
2223 /* First off, if the tag indicates the register was empty,
2224 return 1,0,sign,1 */
2226 /* vex_printf("Empty\n"); */
2227 return AMD64G_FC_MASK_C3
| 0 | (sign
<< AMD64G_FC_SHIFT_C1
)
2228 | AMD64G_FC_MASK_C0
;
2231 bexp
= (f64
[7] << 4) | ((f64
[6] >> 4) & 0x0F);
2236 (f64
[6] & 0x0F) == 0
2237 && (f64
[5] | f64
[4] | f64
[3] | f64
[2] | f64
[1] | f64
[0]) == 0
2240 /* If both exponent and mantissa are zero, the value is zero.
2241 Return 1,0,sign,0. */
2242 if (bexp
== 0 && mantissaIsZero
) {
2243 /* vex_printf("Zero\n"); */
2244 return AMD64G_FC_MASK_C3
| 0
2245 | (sign
<< AMD64G_FC_SHIFT_C1
) | 0;
2248 /* If exponent is zero but mantissa isn't, it's a denormal.
2249 Return 1,1,sign,0. */
2250 if (bexp
== 0 && !mantissaIsZero
) {
2251 /* vex_printf("Denormal\n"); */
2252 return AMD64G_FC_MASK_C3
| AMD64G_FC_MASK_C2
2253 | (sign
<< AMD64G_FC_SHIFT_C1
) | 0;
2256 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
2257 Return 0,1,sign,1. */
2258 if (bexp
== 0x7FF && mantissaIsZero
) {
2259 /* vex_printf("Inf\n"); */
2260 return 0 | AMD64G_FC_MASK_C2
| (sign
<< AMD64G_FC_SHIFT_C1
)
2261 | AMD64G_FC_MASK_C0
;
2264 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
2265 Return 0,0,sign,1. */
2266 if (bexp
== 0x7FF && !mantissaIsZero
) {
2267 /* vex_printf("NaN\n"); */
2268 return 0 | 0 | (sign
<< AMD64G_FC_SHIFT_C1
) | AMD64G_FC_MASK_C0
;
2271 /* Uh, ok, we give up. It must be a normal finite number.
2274 /* vex_printf("normal\n"); */
2275 return 0 | AMD64G_FC_MASK_C2
| (sign
<< AMD64G_FC_SHIFT_C1
) | 0;
2279 /* This is used to implement both 'frstor' and 'fldenv'. The latter
2280 appears to differ from the former only in that the 8 FP registers
2281 themselves are not transferred into the guest state. */
2283 VexEmNote
do_put_x87 ( Bool moveRegs
,
2284 /*IN*/Fpu_State
* x87_state
,
2285 /*OUT*/VexGuestAMD64State
* vex_state
)
2289 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2290 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2291 UInt ftop
= (x87_state
->env
[FP_ENV_STAT
] >> 11) & 7;
2292 UInt tagw
= x87_state
->env
[FP_ENV_TAG
];
2293 UInt fpucw
= x87_state
->env
[FP_ENV_CTRL
];
2294 UInt c3210
= x87_state
->env
[FP_ENV_STAT
] & 0x4700;
2299 /* Copy registers and tags */
2300 for (stno
= 0; stno
< 8; stno
++) {
2301 preg
= (stno
+ ftop
) & 7;
2302 tag
= (tagw
>> (2*preg
)) & 3;
2304 /* register is empty */
2305 /* hmm, if it's empty, does it still get written? Probably
2306 safer to say it does. If we don't, memcheck could get out
2307 of sync, in that it thinks all FP registers are defined by
2308 this helper, but in reality some have not been updated. */
2310 vexRegs
[preg
] = 0; /* IEEE754 64-bit zero */
2313 /* register is non-empty */
2315 convert_f80le_to_f64le( &x87_state
->reg
[10*stno
],
2316 (UChar
*)&vexRegs
[preg
] );
2322 vex_state
->guest_FTOP
= ftop
;
2325 vex_state
->guest_FC3210
= c3210
;
2327 /* handle the control word, setting FPROUND and detecting any
2328 emulation warnings. */
2329 pair
= amd64g_check_fldcw ( (ULong
)fpucw
);
2330 fpround
= (UInt
)pair
& 0xFFFFFFFFULL
;
2331 ew
= (VexEmNote
)(pair
>> 32);
2333 vex_state
->guest_FPROUND
= fpround
& 3;
2335 /* emulation warnings --> caller */
2340 /* Create an x87 FPU state from the guest state, as close as
2341 we can approximate it. */
2343 void do_get_x87 ( /*IN*/VexGuestAMD64State
* vex_state
,
2344 /*OUT*/Fpu_State
* x87_state
)
2348 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2349 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2350 UInt ftop
= vex_state
->guest_FTOP
;
2351 UInt c3210
= vex_state
->guest_FC3210
;
2353 for (i
= 0; i
< 14; i
++)
2354 x87_state
->env
[i
] = 0;
2356 x87_state
->env
[1] = x87_state
->env
[3] = x87_state
->env
[5]
2357 = x87_state
->env
[13] = 0xFFFF;
2358 x87_state
->env
[FP_ENV_STAT
]
2359 = toUShort(((ftop
& 7) << 11) | (c3210
& 0x4700));
2360 x87_state
->env
[FP_ENV_CTRL
]
2361 = toUShort(amd64g_create_fpucw( vex_state
->guest_FPROUND
));
2363 /* Dump the register stack in ST order. */
2365 for (stno
= 0; stno
< 8; stno
++) {
2366 preg
= (stno
+ ftop
) & 7;
2367 if (vexTags
[preg
] == 0) {
2368 /* register is empty */
2369 tagw
|= (3 << (2*preg
));
2370 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2371 &x87_state
->reg
[10*stno
] );
2373 /* register is full. */
2374 tagw
|= (0 << (2*preg
));
2375 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2376 &x87_state
->reg
[10*stno
] );
2379 x87_state
->env
[FP_ENV_TAG
] = toUShort(tagw
);
2383 /*---------------------------------------------------------------*/
2384 /*--- Supporting functions for XSAVE/FXSAVE. ---*/
2385 /*---------------------------------------------------------------*/
2387 /* CALLED FROM GENERATED CODE */
2388 /* DIRTY HELPER (reads guest state, writes guest mem) */
2389 /* XSAVE component 0 is the x87 FPU state. */
2390 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
2391 ( VexGuestAMD64State
* gst
, HWord addr
)
2393 /* Derived from values obtained from
2394 vendor_id : AuthenticAMD
2397 model name : AMD Athlon(tm) 64 Processor 3200+
2402 /* Somewhat roundabout, but at least it's simple. */
2404 UShort
* addrS
= (UShort
*)addr
;
2405 UChar
* addrC
= (UChar
*)addr
;
2409 UShort
*srcS
, *dstS
;
2411 do_get_x87( gst
, &tmp
);
2413 /* Now build the proper fxsave x87 image from the fsave x87 image
2416 addrS
[0] = tmp
.env
[FP_ENV_CTRL
]; /* FCW: fpu control word */
2417 addrS
[1] = tmp
.env
[FP_ENV_STAT
]; /* FCW: fpu status word */
2419 /* set addrS[2] in an endian-independent way */
2421 fp_tags
= tmp
.env
[FP_ENV_TAG
];
2422 for (r
= 0; r
< 8; r
++) {
2423 if ( ((fp_tags
>> (2*r
)) & 3) != 3 )
2424 summary_tags
|= (1 << r
);
2426 addrC
[4] = toUChar(summary_tags
); /* FTW: tag summary byte */
2427 addrC
[5] = 0; /* pad */
2429 /* FOP: faulting fpu opcode. From experimentation, the real CPU
2430 does not write this field. (?!) */
2431 addrS
[3] = 0; /* BOGUS */
2433 /* RIP (Last x87 instruction pointer). From experimentation, the
2434 real CPU does not write this field. (?!) */
2435 addrS
[4] = 0; /* BOGUS */
2436 addrS
[5] = 0; /* BOGUS */
2437 addrS
[6] = 0; /* BOGUS */
2438 addrS
[7] = 0; /* BOGUS */
2440 /* RDP (Last x87 data pointer). From experimentation, the real CPU
2441 does not write this field. (?!) */
2442 addrS
[8] = 0; /* BOGUS */
2443 addrS
[9] = 0; /* BOGUS */
2444 addrS
[10] = 0; /* BOGUS */
2445 addrS
[11] = 0; /* BOGUS */
2447 /* addrS[13,12] are MXCSR -- not written */
2448 /* addrS[15,14] are MXCSR_MASK -- not written */
2450 /* Copy in the FP registers, in ST order. */
2451 for (stno
= 0; stno
< 8; stno
++) {
2452 srcS
= (UShort
*)(&tmp
.reg
[10*stno
]);
2453 dstS
= (UShort
*)(&addrS
[16 + 8*stno
]);
2466 /* CALLED FROM GENERATED CODE */
2467 /* DIRTY HELPER (reads guest state, writes guest mem) */
2468 /* XSAVE component 1 is the SSE state. */
2469 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2470 ( VexGuestAMD64State
* gst
, HWord addr
)
2472 UShort
* addrS
= (UShort
*)addr
;
2475 /* The only non-register parts of the SSE state are MXCSR and
2477 mxcsr
= amd64g_create_mxcsr( gst
->guest_SSEROUND
);
2479 addrS
[12] = toUShort(mxcsr
); /* MXCSR */
2480 addrS
[13] = toUShort(mxcsr
>> 16);
2482 addrS
[14] = 0xFFFF; /* MXCSR mask (lo16) */
2483 addrS
[15] = 0x0000; /* MXCSR mask (hi16) */
2487 /* VISIBLE TO LIBVEX CLIENT */
2488 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2489 the result at the given address which represents a buffer of at
2492 This function is not called from generated code. FXSAVE is dealt
2493 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2494 functions above plus some in-line IR. This function is merely a
2495 convenience function for VEX's users.
2497 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State
* gst
,
2498 /*OUT*/HWord fp_state
)
2500 /* Do the x87 part */
2501 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst
, fp_state
);
2503 /* And now the SSE part, except for the registers themselves. */
2504 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst
, fp_state
);
2506 /* That's the first 160 bytes of the image done. */
2507 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
2508 big-endian, these need to be byte-swapped. */
2509 U128
*xmm
= (U128
*)(fp_state
+ 160);
2510 vassert(host_is_little_endian());
2512 # define COPY_U128(_dst,_src) \
2513 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2514 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2517 COPY_U128( xmm
[0], gst
->guest_YMM0
);
2518 COPY_U128( xmm
[1], gst
->guest_YMM1
);
2519 COPY_U128( xmm
[2], gst
->guest_YMM2
);
2520 COPY_U128( xmm
[3], gst
->guest_YMM3
);
2521 COPY_U128( xmm
[4], gst
->guest_YMM4
);
2522 COPY_U128( xmm
[5], gst
->guest_YMM5
);
2523 COPY_U128( xmm
[6], gst
->guest_YMM6
);
2524 COPY_U128( xmm
[7], gst
->guest_YMM7
);
2525 COPY_U128( xmm
[8], gst
->guest_YMM8
);
2526 COPY_U128( xmm
[9], gst
->guest_YMM9
);
2527 COPY_U128( xmm
[10], gst
->guest_YMM10
);
2528 COPY_U128( xmm
[11], gst
->guest_YMM11
);
2529 COPY_U128( xmm
[12], gst
->guest_YMM12
);
2530 COPY_U128( xmm
[13], gst
->guest_YMM13
);
2531 COPY_U128( xmm
[14], gst
->guest_YMM14
);
2532 COPY_U128( xmm
[15], gst
->guest_YMM15
);
2537 /*---------------------------------------------------------------*/
2538 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
2539 /*---------------------------------------------------------------*/
2541 /* CALLED FROM GENERATED CODE */
2542 /* DIRTY HELPER (writes guest state, reads guest mem) */
2543 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2544 ( VexGuestAMD64State
* gst
, HWord addr
)
2547 UShort
* addrS
= (UShort
*)addr
;
2548 UChar
* addrC
= (UChar
*)addr
;
2552 /* Copy the x87 registers out of the image, into a temporary
2553 Fpu_State struct. */
2554 for (i
= 0; i
< 14; i
++) tmp
.env
[i
] = 0;
2555 for (i
= 0; i
< 80; i
++) tmp
.reg
[i
] = 0;
2556 /* fill in tmp.reg[0..7] */
2557 for (stno
= 0; stno
< 8; stno
++) {
2558 UShort
* dstS
= (UShort
*)(&tmp
.reg
[10*stno
]);
2559 UShort
* srcS
= (UShort
*)(&addrS
[16 + 8*stno
]);
2566 /* fill in tmp.env[0..13] */
2567 tmp
.env
[FP_ENV_CTRL
] = addrS
[0]; /* FCW: fpu control word */
2568 tmp
.env
[FP_ENV_STAT
] = addrS
[1]; /* FCW: fpu status word */
2571 for (r
= 0; r
< 8; r
++) {
2572 if (addrC
[4] & (1<<r
))
2573 fp_tags
|= (0 << (2*r
)); /* EMPTY */
2575 fp_tags
|= (3 << (2*r
)); /* VALID -- not really precise enough. */
2577 tmp
.env
[FP_ENV_TAG
] = fp_tags
;
2579 /* Now write 'tmp' into the guest state. */
2580 VexEmNote warnX87
= do_put_x87( True
/*moveRegs*/, &tmp
, gst
);
2586 /* CALLED FROM GENERATED CODE */
2587 /* DIRTY HELPER (writes guest state, reads guest mem) */
2588 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2589 ( VexGuestAMD64State
* gst
, HWord addr
)
2591 UShort
* addrS
= (UShort
*)addr
;
2592 UInt w32
= (((UInt
)addrS
[12]) & 0xFFFF)
2593 | ((((UInt
)addrS
[13]) & 0xFFFF) << 16);
2594 ULong w64
= amd64g_check_ldmxcsr( (ULong
)w32
);
2596 VexEmNote warnXMM
= (VexEmNote
)(w64
>> 32);
2598 gst
->guest_SSEROUND
= w64
& 0xFFFFFFFFULL
;
2603 /* VISIBLE TO LIBVEX CLIENT */
2604 /* Do FXRSTOR from the supplied address and store read values to the given
2605 VexGuestAMD64State structure.
2607 This function is not called from generated code. FXRSTOR is dealt
2608 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2609 functions above plus some in-line IR. This function is merely a
2610 convenience function for VEX's users.
2612 VexEmNote
LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state
,
2613 /*MOD*/VexGuestAMD64State
* gst
)
2615 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
2616 to be byte-swapped. */
2617 U128
*xmm
= (U128
*)(fp_state
+ 160);
2619 vassert(host_is_little_endian());
2621 # define COPY_U128(_dst,_src) \
2622 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2623 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2626 COPY_U128( gst
->guest_YMM0
, xmm
[0] );
2627 COPY_U128( gst
->guest_YMM1
, xmm
[1] );
2628 COPY_U128( gst
->guest_YMM2
, xmm
[2] );
2629 COPY_U128( gst
->guest_YMM3
, xmm
[3] );
2630 COPY_U128( gst
->guest_YMM4
, xmm
[4] );
2631 COPY_U128( gst
->guest_YMM5
, xmm
[5] );
2632 COPY_U128( gst
->guest_YMM6
, xmm
[6] );
2633 COPY_U128( gst
->guest_YMM7
, xmm
[7] );
2634 COPY_U128( gst
->guest_YMM8
, xmm
[8] );
2635 COPY_U128( gst
->guest_YMM9
, xmm
[9] );
2636 COPY_U128( gst
->guest_YMM10
, xmm
[10] );
2637 COPY_U128( gst
->guest_YMM11
, xmm
[11] );
2638 COPY_U128( gst
->guest_YMM12
, xmm
[12] );
2639 COPY_U128( gst
->guest_YMM13
, xmm
[13] );
2640 COPY_U128( gst
->guest_YMM14
, xmm
[14] );
2641 COPY_U128( gst
->guest_YMM15
, xmm
[15] );
2646 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst
, fp_state
);
2648 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst
, fp_state
);
2650 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2651 if (warnX87
!= EmNote_NONE
)
2658 /*---------------------------------------------------------------*/
2659 /*--- Supporting functions for FSAVE/FRSTOR ---*/
2660 /*---------------------------------------------------------------*/
2662 /* DIRTY HELPER (writes guest state) */
2663 /* Initialise the x87 FPU state as per 'finit'. */
2664 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State
* gst
)
2667 gst
->guest_FTOP
= 0;
2668 for (i
= 0; i
< 8; i
++) {
2669 gst
->guest_FPTAG
[i
] = 0; /* empty */
2670 gst
->guest_FPREG
[i
] = 0; /* IEEE754 64-bit zero */
2672 gst
->guest_FPROUND
= (ULong
)Irrm_NEAREST
;
2673 gst
->guest_FC3210
= 0;
2677 /* CALLED FROM GENERATED CODE */
2678 /* DIRTY HELPER (reads guest memory) */
2679 ULong
amd64g_dirtyhelper_loadF80le ( Addr addrU
)
2682 convert_f80le_to_f64le ( (UChar
*)addrU
, (UChar
*)&f64
);
2686 /* CALLED FROM GENERATED CODE */
2687 /* DIRTY HELPER (writes guest memory) */
2688 void amd64g_dirtyhelper_storeF80le ( Addr addrU
, ULong f64
)
2690 convert_f64le_to_f80le( (UChar
*)&f64
, (UChar
*)addrU
);
2694 /* CALLED FROM GENERATED CODE */
2696 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2697 Extract from it the required SSEROUND value and any resulting
2698 emulation warning, and return (warn << 32) | sseround value.
2700 ULong
amd64g_check_ldmxcsr ( ULong mxcsr
)
2702 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2703 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2704 ULong rmode
= (mxcsr
>> 13) & 3;
2706 /* Detect any required emulation warnings. */
2707 VexEmNote ew
= EmNote_NONE
;
2709 if ((mxcsr
& 0x1F80) != 0x1F80) {
2710 /* unmasked exceptions! */
2711 ew
= EmWarn_X86_sseExns
;
2714 if (mxcsr
& (1<<15)) {
2719 if (mxcsr
& (1<<6)) {
2721 ew
= EmWarn_X86_daz
;
2724 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
2728 /* CALLED FROM GENERATED CODE */
2730 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2731 native format MXCSR value. */
2732 ULong
amd64g_create_mxcsr ( ULong sseround
)
2735 return 0x1F80 | (sseround
<< 13);
2740 /* fpucw[15:0] contains a x87 native format FPU control word.
2741 Extract from it the required FPROUND value and any resulting
2742 emulation warning, and return (warn << 32) | fpround value.
2744 ULong
amd64g_check_fldcw ( ULong fpucw
)
2746 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2747 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2748 ULong rmode
= (fpucw
>> 10) & 3;
2750 /* Detect any required emulation warnings. */
2751 VexEmNote ew
= EmNote_NONE
;
2753 if ((fpucw
& 0x3F) != 0x3F) {
2754 /* unmasked exceptions! */
2755 ew
= EmWarn_X86_x87exns
;
2758 if (((fpucw
>> 8) & 3) != 3) {
2759 /* unsupported precision */
2760 ew
= EmWarn_X86_x87precision
;
2763 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
2768 /* Given fpround as an IRRoundingMode value, create a suitable x87
2769 native format FPU control word. */
2770 ULong
amd64g_create_fpucw ( ULong fpround
)
2773 return 0x037F | (fpround
<< 10);
2777 /* This is used to implement 'fldenv'.
2778 Reads 28 bytes at x87_state[0 .. 27]. */
2779 /* CALLED FROM GENERATED CODE */
2781 VexEmNote
amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State
* vex_state
,
2782 /*IN*/HWord x87_state
)
2784 return do_put_x87( False
, (Fpu_State
*)x87_state
, vex_state
);
2788 /* CALLED FROM GENERATED CODE */
2790 /* Create an x87 FPU env from the guest state, as close as we can
2791 approximate it. Writes 28 bytes at x87_state[0..27]. */
2792 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State
* vex_state
,
2793 /*OUT*/HWord x87_state
)
2797 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2798 Fpu_State
* x87
= (Fpu_State
*)x87_state
;
2799 UInt ftop
= vex_state
->guest_FTOP
;
2800 ULong c3210
= vex_state
->guest_FC3210
;
2802 for (i
= 0; i
< 14; i
++)
2805 x87
->env
[1] = x87
->env
[3] = x87
->env
[5] = x87
->env
[13] = 0xFFFF;
2806 x87
->env
[FP_ENV_STAT
]
2807 = toUShort(toUInt( ((ftop
& 7) << 11) | (c3210
& 0x4700) ));
2808 x87
->env
[FP_ENV_CTRL
]
2809 = toUShort(toUInt( amd64g_create_fpucw( vex_state
->guest_FPROUND
) ));
2811 /* Compute the x87 tag word. */
2813 for (stno
= 0; stno
< 8; stno
++) {
2814 preg
= (stno
+ ftop
) & 7;
2815 if (vexTags
[preg
] == 0) {
2816 /* register is empty */
2817 tagw
|= (3 << (2*preg
));
2819 /* register is full. */
2820 tagw
|= (0 << (2*preg
));
2823 x87
->env
[FP_ENV_TAG
] = toUShort(tagw
);
2825 /* We don't dump the x87 registers, tho. */
2829 /* This is used to implement 'fnsave'.
2830 Writes 108 bytes at x87_state[0 .. 107]. */
2831 /* CALLED FROM GENERATED CODE */
2833 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State
* vex_state
,
2834 /*OUT*/HWord x87_state
)
2836 do_get_x87( vex_state
, (Fpu_State
*)x87_state
);
2840 /* This is used to implement 'fnsaves'.
2841 Writes 94 bytes at x87_state[0 .. 93]. */
2842 /* CALLED FROM GENERATED CODE */
2844 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State
* vex_state
,
2845 /*OUT*/HWord x87_state
)
2849 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2850 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2851 Fpu_State_16
* x87
= (Fpu_State_16
*)x87_state
;
2852 UInt ftop
= vex_state
->guest_FTOP
;
2853 UInt c3210
= vex_state
->guest_FC3210
;
2855 for (i
= 0; i
< 7; i
++)
2858 x87
->env
[FPS_ENV_STAT
]
2859 = toUShort(((ftop
& 7) << 11) | (c3210
& 0x4700));
2860 x87
->env
[FPS_ENV_CTRL
]
2861 = toUShort(amd64g_create_fpucw( vex_state
->guest_FPROUND
));
2863 /* Dump the register stack in ST order. */
2865 for (stno
= 0; stno
< 8; stno
++) {
2866 preg
= (stno
+ ftop
) & 7;
2867 if (vexTags
[preg
] == 0) {
2868 /* register is empty */
2869 tagw
|= (3 << (2*preg
));
2870 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2871 &x87
->reg
[10*stno
] );
2873 /* register is full. */
2874 tagw
|= (0 << (2*preg
));
2875 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
2876 &x87
->reg
[10*stno
] );
2879 x87
->env
[FPS_ENV_TAG
] = toUShort(tagw
);
2883 /* This is used to implement 'frstor'.
2884 Reads 108 bytes at x87_state[0 .. 107]. */
2885 /* CALLED FROM GENERATED CODE */
2887 VexEmNote
amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State
* vex_state
,
2888 /*IN*/HWord x87_state
)
2890 return do_put_x87( True
, (Fpu_State
*)x87_state
, vex_state
);
2894 /* This is used to implement 'frstors'.
2895 Reads 94 bytes at x87_state[0 .. 93]. */
2896 /* CALLED FROM GENERATED CODE */
2898 VexEmNote
amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State
* vex_state
,
2899 /*IN*/HWord x87_state
)
2903 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
2904 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
2905 Fpu_State_16
* x87
= (Fpu_State_16
*)x87_state
;
2906 UInt ftop
= (x87
->env
[FPS_ENV_STAT
] >> 11) & 7;
2907 UInt tagw
= x87
->env
[FPS_ENV_TAG
];
2908 UInt fpucw
= x87
->env
[FPS_ENV_CTRL
];
2909 UInt c3210
= x87
->env
[FPS_ENV_STAT
] & 0x4700;
2914 /* Copy registers and tags */
2915 for (stno
= 0; stno
< 8; stno
++) {
2916 preg
= (stno
+ ftop
) & 7;
2917 tag
= (tagw
>> (2*preg
)) & 3;
2919 /* register is empty */
2920 /* hmm, if it's empty, does it still get written? Probably
2921 safer to say it does. If we don't, memcheck could get out
2922 of sync, in that it thinks all FP registers are defined by
2923 this helper, but in reality some have not been updated. */
2924 vexRegs
[preg
] = 0; /* IEEE754 64-bit zero */
2927 /* register is non-empty */
2928 convert_f80le_to_f64le( &x87
->reg
[10*stno
],
2929 (UChar
*)&vexRegs
[preg
] );
2935 vex_state
->guest_FTOP
= ftop
;
2938 vex_state
->guest_FC3210
= c3210
;
2940 /* handle the control word, setting FPROUND and detecting any
2941 emulation warnings. */
2942 pair
= amd64g_check_fldcw ( (ULong
)fpucw
);
2943 fpround
= (UInt
)pair
& 0xFFFFFFFFULL
;
2944 ew
= (VexEmNote
)(pair
>> 32);
2946 vex_state
->guest_FPROUND
= fpround
& 3;
2948 /* emulation warnings --> caller */
2953 /*---------------------------------------------------------------*/
2954 /*--- CPUID helpers. ---*/
2955 /*---------------------------------------------------------------*/
2957 /* Claim to be the following CPU, which is probably representative of
2958 the lowliest (earliest) amd64 offerings. It can do neither sse3
2961 vendor_id : AuthenticAMD
2964 model name : AMD Opteron (tm) Processor 848
2967 cache size : 1024 KB
2972 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2973 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2974 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2976 TLB size : 1088 4K pages
2978 cache_alignment : 64
2979 address sizes : 40 bits physical, 48 bits virtual
2980 power management: ts fid vid ttp
2982 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2983 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2984 and 3dnowext is 80000001.EDX.30.
2986 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State
* st
)
2988 # define SET_ABCD(_a,_b,_c,_d) \
2989 do { st->guest_RAX = (ULong)(_a); \
2990 st->guest_RBX = (ULong)(_b); \
2991 st->guest_RCX = (ULong)(_c); \
2992 st->guest_RDX = (ULong)(_d); \
2995 switch (0xFFFFFFFF & st
->guest_RAX
) {
2997 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
3000 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
3003 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
3006 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
3007 the original it-is-supported value that the h/w provides.
3009 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
3013 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
3016 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
3019 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3022 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
3025 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
3028 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
3031 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
3034 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3041 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
3044 vendor_id : GenuineIntel
3047 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
3050 cache size : 4096 KB
3059 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3060 mtrr pge mca cmov pat pse36 clflush dts acpi
3061 mmx fxsr sse sse2 ss ht tm syscall nx lm
3062 constant_tsc pni monitor ds_cpl vmx est tm2
3066 cache_alignment : 64
3067 address sizes : 36 bits physical, 48 bits virtual
3070 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State
* st
)
3072 # define SET_ABCD(_a,_b,_c,_d) \
3073 do { st->guest_RAX = (ULong)(_a); \
3074 st->guest_RBX = (ULong)(_b); \
3075 st->guest_RCX = (ULong)(_c); \
3076 st->guest_RDX = (ULong)(_d); \
3079 switch (0xFFFFFFFF & st
->guest_RAX
) {
3081 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
3084 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
3087 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
3090 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3093 switch (0xFFFFFFFF & st
->guest_RCX
) {
3094 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
3095 0x0000003f, 0x00000001); break;
3096 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
3097 0x0000003f, 0x00000001); break;
3098 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
3099 0x00000fff, 0x00000001); break;
3100 default: SET_ABCD(0x00000000, 0x00000000,
3101 0x00000000, 0x00000000); break;
3106 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
3109 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
3112 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3115 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
3118 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3121 unhandled_eax_value
:
3122 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
3125 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3128 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
3131 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3134 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
3137 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
3140 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3143 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
3146 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3149 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3152 goto unhandled_eax_value
;
3158 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
3161 vendor_id : GenuineIntel
3164 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
3167 cache size : 4096 KB
3178 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3179 mtrr pge mca cmov pat pse36 clflush dts acpi
3180 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3181 lm constant_tsc arch_perfmon pebs bts rep_good
3182 xtopology nonstop_tsc aperfmperf pni pclmulqdq
3183 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
3184 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
3185 arat tpr_shadow vnmi flexpriority ept vpid
3188 cache_alignment : 64
3189 address sizes : 36 bits physical, 48 bits virtual
3192 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State
* st
)
3194 # define SET_ABCD(_a,_b,_c,_d) \
3195 do { st->guest_RAX = (ULong)(_a); \
3196 st->guest_RBX = (ULong)(_b); \
3197 st->guest_RCX = (ULong)(_c); \
3198 st->guest_RDX = (ULong)(_d); \
3201 UInt old_eax
= (UInt
)st
->guest_RAX
;
3202 UInt old_ecx
= (UInt
)st
->guest_RCX
;
3206 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
3209 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
3212 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
3215 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3219 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3220 0x0000003f, 0x00000000); break;
3221 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
3222 0x0000007f, 0x00000000); break;
3223 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3224 0x000001ff, 0x00000000); break;
3225 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3226 0x00000fff, 0x00000002); break;
3227 default: SET_ABCD(0x00000000, 0x00000000,
3228 0x00000000, 0x00000000); break;
3232 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3235 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
3238 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3241 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3244 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3247 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
3252 SET_ABCD(0x00000001, 0x00000002,
3253 0x00000100, 0x00000000); break;
3255 SET_ABCD(0x00000004, 0x00000004,
3256 0x00000201, 0x00000000); break;
3258 SET_ABCD(0x00000000, 0x00000000,
3259 old_ecx
, 0x00000000); break;
3263 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3267 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3268 0x00000100, 0x00000000); break;
3269 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
3270 0x00000201, 0x00000000); break;
3271 default: SET_ABCD(0x00000000, 0x00000000,
3272 old_ecx
, 0x00000000); break;
3276 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3279 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3282 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3285 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
3288 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
3291 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3294 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3297 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3300 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3303 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3310 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
3311 capable. Plus (kludge!) it "supports" HTM.
3313 Also with the following change: claim that XSaveOpt is not
3314 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
3315 on the real CPU. Consequently, programs that correctly observe
3316 these CPUID values should only try to use 3 of the 8 XSave-family
3317 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
3318 having to implement the compacted or optimised save/restore
3321 vendor_id : GenuineIntel
3324 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
3327 cache size : 6144 KB
3338 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3339 mtrr pge mca cmov pat pse36 clflush dts acpi
3340 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3341 lm constant_tsc arch_perfmon pebs bts rep_good
3342 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
3343 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
3344 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
3345 lahf_lm ida arat epb xsaveopt pln pts dts
3346 tpr_shadow vnmi flexpriority ept vpid
3350 cache_alignment : 64
3351 address sizes : 36 bits physical, 48 bits virtual
3354 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State
* st
,
3355 ULong hasF16C
, ULong hasRDRAND
,
3358 vassert((hasF16C
>> 1) == 0ULL);
3359 vassert((hasRDRAND
>> 1) == 0ULL);
3360 # define SET_ABCD(_a,_b,_c,_d) \
3361 do { st->guest_RAX = (ULong)(_a); \
3362 st->guest_RBX = (ULong)(_b); \
3363 st->guest_RCX = (ULong)(_c); \
3364 st->guest_RDX = (ULong)(_d); \
3367 UInt old_eax
= (UInt
)st
->guest_RAX
;
3368 UInt old_ecx
= (UInt
)st
->guest_RCX
;
3372 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3375 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3376 // but patch in support for them as directed by the caller.
3378 = (hasF16C
? (1U << 29) : 0) | (hasRDRAND
? (1U << 30) : 0);
3379 SET_ABCD(0x000206a7, 0x00100800, (0x1f9ae3bf | ecx_extra
), 0xbfebfbff);
3383 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
3386 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3390 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3391 0x0000003f, 0x00000000); break;
3392 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3393 0x0000003f, 0x00000000); break;
3394 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3395 0x000001ff, 0x00000000); break;
3396 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
3397 0x00001fff, 0x00000006); break;
3398 default: SET_ABCD(0x00000000, 0x00000000,
3399 0x00000000, 0x00000000); break;
3403 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3406 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3411 ebx_extra
= hasRDSEED
? (1U << 18) : 0;
3412 SET_ABCD(0x00000000, 0x00000800 | ebx_extra
, 0x00000000,
3417 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3420 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3423 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3428 SET_ABCD(0x00000001, 0x00000001,
3429 0x00000100, 0x00000000); break;
3431 SET_ABCD(0x00000004, 0x00000004,
3432 0x00000201, 0x00000000); break;
3434 SET_ABCD(0x00000000, 0x00000000,
3435 old_ecx
, 0x00000000); break;
3439 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3443 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3444 0x00000340, 0x00000000); break;
3445 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3446 0x00000000, 0x00000000); break;
3447 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3448 0x00000000, 0x00000000); break;
3449 default: SET_ABCD(0x00000000, 0x00000000,
3450 0x00000000, 0x00000000); break;
3454 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3457 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3460 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3463 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3466 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3469 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3472 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3475 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3478 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3481 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3484 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3487 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3494 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3496 With the following change: claim that XSaveOpt is not available, by
3497 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3498 CPU. Consequently, programs that correctly observe these CPUID
3499 values should only try to use 3 of the 8 XSave-family instructions:
3500 XGETBV, XSAVE and XRSTOR. In particular this avoids having to
3501 implement the compacted or optimised save/restore variants.
3503 vendor_id : GenuineIntel
3506 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3510 cache size : 8192 KB
3521 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3522 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3523 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3524 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3525 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3526 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3527 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3528 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3529 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3530 bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3534 cache_alignment : 64
3535 address sizes : 39 bits physical, 48 bits virtual
3538 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State
* st
,
3539 ULong hasF16C
, ULong hasRDRAND
,
3542 vassert((hasF16C
>> 1) == 0ULL);
3543 vassert((hasRDRAND
>> 1) == 0ULL);
3544 # define SET_ABCD(_a,_b,_c,_d) \
3545 do { st->guest_RAX = (ULong)(_a); \
3546 st->guest_RBX = (ULong)(_b); \
3547 st->guest_RCX = (ULong)(_c); \
3548 st->guest_RDX = (ULong)(_d); \
3551 UInt old_eax
= (UInt
)st
->guest_RAX
;
3552 UInt old_ecx
= (UInt
)st
->guest_RCX
;
3556 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3559 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3560 // but patch in support for them as directed by the caller.
3562 = (hasF16C
? (1U << 29) : 0) | (hasRDRAND
? (1U << 30) : 0);
3563 SET_ABCD(0x000306c3, 0x02100800, (0x1ffafbff | ecx_extra
), 0xbfebfbff);
3567 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3570 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3574 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3575 0x0000003f, 0x00000000); break;
3576 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3577 0x0000003f, 0x00000000); break;
3578 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3579 0x000001ff, 0x00000000); break;
3580 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3581 0x00001fff, 0x00000006); break;
3582 default: SET_ABCD(0x00000000, 0x00000000,
3583 0x00000000, 0x00000000); break;
3587 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3590 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3594 /* Don't advertise FSGSBASE support, bit 0 in EBX. */
3597 UInt ebx_extra
= hasRDSEED
? (1U << 18) : 0;
3598 SET_ABCD(0x00000000, 0x000027aa | ebx_extra
,
3599 0x00000000, 0x00000000); break;
3601 default: SET_ABCD(0x00000000, 0x00000000,
3602 0x00000000, 0x00000000); break;
3606 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3609 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3612 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3616 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3617 0x00000100, 0x00000002); break;
3618 case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3619 0x00000201, 0x00000002); break;
3620 default: SET_ABCD(0x00000000, 0x00000000,
3621 old_ecx
, 0x00000002); break;
3625 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3629 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3630 0x00000340, 0x00000000); break;
3631 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3632 0x00000000, 0x00000000); break;
3633 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3634 0x00000000, 0x00000000); break;
3635 default: SET_ABCD(0x00000000, 0x00000000,
3636 0x00000000, 0x00000000); break;
3640 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3643 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3646 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3649 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3652 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3655 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3658 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3661 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3664 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3667 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3674 /*---------------------------------------------------------------*/
3675 /*--- Misc integer helpers, including rotates and crypto. ---*/
3676 /*---------------------------------------------------------------*/
3678 ULong
amd64g_calculate_RCR ( ULong arg
,
3683 Bool wantRflags
= toBool(szIN
< 0);
3684 ULong sz
= wantRflags
? (-szIN
) : szIN
;
3685 ULong tempCOUNT
= rot_amt
& (sz
== 8 ? 0x3F : 0x1F);
3686 ULong cf
=0, of
=0, tempcf
;
3690 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3691 of
= ((arg
>> 63) ^ cf
) & 1;
3692 while (tempCOUNT
> 0) {
3694 arg
= (arg
>> 1) | (cf
<< 63);
3700 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3701 of
= ((arg
>> 31) ^ cf
) & 1;
3702 while (tempCOUNT
> 0) {
3704 arg
= ((arg
>> 1) & 0x7FFFFFFFULL
) | (cf
<< 31);
3710 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
3711 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3712 of
= ((arg
>> 15) ^ cf
) & 1;
3713 while (tempCOUNT
> 0) {
3715 arg
= ((arg
>> 1) & 0x7FFFULL
) | (cf
<< 15);
3721 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
3722 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3723 of
= ((arg
>> 7) ^ cf
) & 1;
3724 while (tempCOUNT
> 0) {
3726 arg
= ((arg
>> 1) & 0x7FULL
) | (cf
<< 7);
3732 vpanic("calculate_RCR(amd64g): invalid size");
3737 rflags_in
&= ~(AMD64G_CC_MASK_C
| AMD64G_CC_MASK_O
);
3738 rflags_in
|= (cf
<< AMD64G_CC_SHIFT_C
) | (of
<< AMD64G_CC_SHIFT_O
);
3740 /* caller can ask to have back either the resulting flags or
3741 resulting value, but not both */
3742 return wantRflags
? rflags_in
: arg
;
3745 ULong
amd64g_calculate_RCL ( ULong arg
,
3750 Bool wantRflags
= toBool(szIN
< 0);
3751 ULong sz
= wantRflags
? (-szIN
) : szIN
;
3752 ULong tempCOUNT
= rot_amt
& (sz
== 8 ? 0x3F : 0x1F);
3753 ULong cf
=0, of
=0, tempcf
;
3757 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3758 while (tempCOUNT
> 0) {
3759 tempcf
= (arg
>> 63) & 1;
3760 arg
= (arg
<< 1) | (cf
& 1);
3764 of
= ((arg
>> 63) ^ cf
) & 1;
3767 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3768 while (tempCOUNT
> 0) {
3769 tempcf
= (arg
>> 31) & 1;
3770 arg
= 0xFFFFFFFFULL
& ((arg
<< 1) | (cf
& 1));
3774 of
= ((arg
>> 31) ^ cf
) & 1;
3777 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
3778 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3779 while (tempCOUNT
> 0) {
3780 tempcf
= (arg
>> 15) & 1;
3781 arg
= 0xFFFFULL
& ((arg
<< 1) | (cf
& 1));
3785 of
= ((arg
>> 15) ^ cf
) & 1;
3788 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
3789 cf
= (rflags_in
>> AMD64G_CC_SHIFT_C
) & 1;
3790 while (tempCOUNT
> 0) {
3791 tempcf
= (arg
>> 7) & 1;
3792 arg
= 0xFFULL
& ((arg
<< 1) | (cf
& 1));
3796 of
= ((arg
>> 7) ^ cf
) & 1;
3799 vpanic("calculate_RCL(amd64g): invalid size");
3804 rflags_in
&= ~(AMD64G_CC_MASK_C
| AMD64G_CC_MASK_O
);
3805 rflags_in
|= (cf
<< AMD64G_CC_SHIFT_C
) | (of
<< AMD64G_CC_SHIFT_O
);
3807 return wantRflags
? rflags_in
: arg
;
3810 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3811 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3813 ULong
amd64g_calculate_pclmul(ULong a
, ULong b
, ULong which
)
3815 ULong hi
, lo
, tmp
, A
[16];
3818 A
[2] = A
[1] << 1; A
[3] = A
[2] ^ a
;
3819 A
[4] = A
[2] << 1; A
[5] = A
[4] ^ a
;
3820 A
[6] = A
[3] << 1; A
[7] = A
[6] ^ a
;
3821 A
[8] = A
[4] << 1; A
[9] = A
[8] ^ a
;
3822 A
[10] = A
[5] << 1; A
[11] = A
[10] ^ a
;
3823 A
[12] = A
[6] << 1; A
[13] = A
[12] ^ a
;
3824 A
[14] = A
[7] << 1; A
[15] = A
[14] ^ a
;
3826 lo
= (A
[b
>> 60] << 4) ^ A
[(b
>> 56) & 15];
3828 lo
= (lo
<< 8) ^ (A
[(b
>> 52) & 15] << 4) ^ A
[(b
>> 48) & 15];
3829 hi
= (hi
<< 8) | (lo
>> 56);
3830 lo
= (lo
<< 8) ^ (A
[(b
>> 44) & 15] << 4) ^ A
[(b
>> 40) & 15];
3831 hi
= (hi
<< 8) | (lo
>> 56);
3832 lo
= (lo
<< 8) ^ (A
[(b
>> 36) & 15] << 4) ^ A
[(b
>> 32) & 15];
3833 hi
= (hi
<< 8) | (lo
>> 56);
3834 lo
= (lo
<< 8) ^ (A
[(b
>> 28) & 15] << 4) ^ A
[(b
>> 24) & 15];
3835 hi
= (hi
<< 8) | (lo
>> 56);
3836 lo
= (lo
<< 8) ^ (A
[(b
>> 20) & 15] << 4) ^ A
[(b
>> 16) & 15];
3837 hi
= (hi
<< 8) | (lo
>> 56);
3838 lo
= (lo
<< 8) ^ (A
[(b
>> 12) & 15] << 4) ^ A
[(b
>> 8) & 15];
3839 hi
= (hi
<< 8) | (lo
>> 56);
3840 lo
= (lo
<< 8) ^ (A
[(b
>> 4) & 15] << 4) ^ A
[b
& 15];
3844 tmp
= -((a
>> 63) & 1); tmp
&= ((b
& (m0
* 0xfe)) >> 1); hi
= hi
^ tmp
;
3845 tmp
= -((a
>> 62) & 1); tmp
&= ((b
& (m0
* 0xfc)) >> 2); hi
= hi
^ tmp
;
3846 tmp
= -((a
>> 61) & 1); tmp
&= ((b
& (m0
* 0xf8)) >> 3); hi
= hi
^ tmp
;
3847 tmp
= -((a
>> 60) & 1); tmp
&= ((b
& (m0
* 0xf0)) >> 4); hi
= hi
^ tmp
;
3848 tmp
= -((a
>> 59) & 1); tmp
&= ((b
& (m0
* 0xe0)) >> 5); hi
= hi
^ tmp
;
3849 tmp
= -((a
>> 58) & 1); tmp
&= ((b
& (m0
* 0xc0)) >> 6); hi
= hi
^ tmp
;
3850 tmp
= -((a
>> 57) & 1); tmp
&= ((b
& (m0
* 0x80)) >> 7); hi
= hi
^ tmp
;
3852 return which
? hi
: lo
;
3856 /* CALLED FROM GENERATED CODE */
3857 /* DIRTY HELPER (non-referentially-transparent) */
3858 /* Horrible hack. On non-amd64 platforms, return 1. */
3859 ULong
amd64g_dirtyhelper_RDTSC ( void )
3861 # if defined(__x86_64__)
3863 __asm__
__volatile__("rdtsc" : "=a" (eax
), "=d" (edx
));
3864 return (((ULong
)edx
) << 32) | ((ULong
)eax
);
3870 /* CALLED FROM GENERATED CODE */
3871 /* DIRTY HELPER (non-referentially-transparent) */
3872 /* Horrible hack. On non-amd64 platforms, return 1. */
3873 /* This uses a different calling convention from _RDTSC just above
3874 only because of the difficulty of returning 96 bits from a C
3875 function -- RDTSC returns 64 bits and so is simple by comparison,
3877 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State
* st
)
3879 # if defined(__x86_64__)
3881 __asm__
__volatile__("rdtscp" : "=a" (eax
), "=d" (edx
), "=c" (ecx
));
3882 st
->guest_RAX
= (ULong
)eax
;
3883 st
->guest_RCX
= (ULong
)ecx
;
3884 st
->guest_RDX
= (ULong
)edx
;
3890 /* CALLED FROM GENERATED CODE */
3891 /* DIRTY HELPER (non-referentially-transparent) */
3892 /* Horrible hack. On non-amd64 platforms, return 0. */
3893 ULong
amd64g_dirtyhelper_IN ( ULong portno
, ULong sz
/*1,2 or 4*/ )
3895 # if defined(__x86_64__)
3900 __asm__
__volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3901 : "=a" (r
) : "Nd" (portno
));
3904 __asm__
__volatile__("movq $0,%%rax; inw %w1,%w0"
3905 : "=a" (r
) : "Nd" (portno
));
3908 __asm__
__volatile__("movq $0,%%rax; inb %w1,%b0"
3909 : "=a" (r
) : "Nd" (portno
));
3912 break; /* note: no 64-bit version of insn exists */
3921 /* CALLED FROM GENERATED CODE */
3922 /* DIRTY HELPER (non-referentially-transparent) */
3923 /* Horrible hack. On non-amd64 platforms, do nothing. */
3924 void amd64g_dirtyhelper_OUT ( ULong portno
, ULong data
, ULong sz
/*1,2 or 4*/ )
3926 # if defined(__x86_64__)
3930 __asm__
__volatile__("movq %0,%%rax; outl %%eax, %w1"
3931 : : "a" (data
), "Nd" (portno
));
3934 __asm__
__volatile__("outw %w0, %w1"
3935 : : "a" (data
), "Nd" (portno
));
3938 __asm__
__volatile__("outb %b0, %w1"
3939 : : "a" (data
), "Nd" (portno
));
3942 break; /* note: no 64-bit version of insn exists */
3949 /* CALLED FROM GENERATED CODE */
3950 /* DIRTY HELPER (non-referentially-transparent) */
3951 /* Horrible hack. On non-amd64 platforms, do nothing. */
3952 /* op = 0: call the native SGDT instruction.
3953 op = 1: call the native SIDT instruction.
3955 void amd64g_dirtyhelper_SxDT ( void *address
, ULong op
) {
3956 # if defined(__x86_64__)
3959 __asm__
__volatile__("sgdt (%0)" : : "r" (address
) : "memory");
3962 __asm__
__volatile__("sidt (%0)" : : "r" (address
) : "memory");
3965 vpanic("amd64g_dirtyhelper_SxDT");
3969 UChar
* p
= (UChar
*)address
;
3970 p
[0] = p
[1] = p
[2] = p
[3] = p
[4] = p
[5] = 0;
3971 p
[6] = p
[7] = p
[8] = p
[9] = 0;
3975 /* CALLED FROM GENERATED CODE */
3976 /* DIRTY HELPER (non-referentially-transparent) */
3977 /* Horrible hack. On non-amd64 platforms, do nothing. On amd64 targets, get a
3978 32 bit random number using RDRAND, and return it and the associated rflags.C
3980 ULong
amd64g_dirtyhelper_RDRAND ( void ) {
3981 # if defined(__x86_64__)
3984 __asm__
__volatile__(
3991 : "=r"(res
), "=r"(cflag
) : : "r11", "r12"
3993 res
&= 0xFFFFFFFFULL
;
3995 return (cflag
<< 32) | res
;
3997 /* There's nothing we can sensibly do. Return a value denoting
3998 "I succeeded, and the random bits are all zero" :-/ */
4003 ULong
amd64g_dirtyhelper_RDSEED ( void ) {
4004 # if defined(__x86_64__)
4007 __asm__
__volatile__(
4014 : "=r"(res
), "=r"(cflag
) : : "r11", "r12"
4016 res
&= 0xFFFFFFFFULL
;
4018 return (cflag
<< 32) | res
;
4020 /* There's nothing we can sensibly do. Return a value denoting
4021 "I succeeded, and the random bits are all zero" :-/ */
4026 /*---------------------------------------------------------------*/
4027 /*--- Helpers for MMX/SSE/SSE2. ---*/
4028 /*---------------------------------------------------------------*/
4030 static inline UChar
abdU8 ( UChar xx
, UChar yy
) {
4031 return toUChar(xx
>yy
? xx
-yy
: yy
-xx
);
4034 static inline ULong
mk32x2 ( UInt w1
, UInt w0
) {
4035 return (((ULong
)w1
) << 32) | ((ULong
)w0
);
4038 static inline UShort
sel16x4_3 ( ULong w64
) {
4039 UInt hi32
= toUInt(w64
>> 32);
4040 return toUShort(hi32
>> 16);
4042 static inline UShort
sel16x4_2 ( ULong w64
) {
4043 UInt hi32
= toUInt(w64
>> 32);
4044 return toUShort(hi32
);
4046 static inline UShort
sel16x4_1 ( ULong w64
) {
4047 UInt lo32
= toUInt(w64
);
4048 return toUShort(lo32
>> 16);
4050 static inline UShort
sel16x4_0 ( ULong w64
) {
4051 UInt lo32
= toUInt(w64
);
4052 return toUShort(lo32
);
4055 static inline UChar
sel8x8_7 ( ULong w64
) {
4056 UInt hi32
= toUInt(w64
>> 32);
4057 return toUChar(hi32
>> 24);
4059 static inline UChar
sel8x8_6 ( ULong w64
) {
4060 UInt hi32
= toUInt(w64
>> 32);
4061 return toUChar(hi32
>> 16);
4063 static inline UChar
sel8x8_5 ( ULong w64
) {
4064 UInt hi32
= toUInt(w64
>> 32);
4065 return toUChar(hi32
>> 8);
4067 static inline UChar
sel8x8_4 ( ULong w64
) {
4068 UInt hi32
= toUInt(w64
>> 32);
4069 return toUChar(hi32
>> 0);
4071 static inline UChar
sel8x8_3 ( ULong w64
) {
4072 UInt lo32
= toUInt(w64
);
4073 return toUChar(lo32
>> 24);
4075 static inline UChar
sel8x8_2 ( ULong w64
) {
4076 UInt lo32
= toUInt(w64
);
4077 return toUChar(lo32
>> 16);
4079 static inline UChar
sel8x8_1 ( ULong w64
) {
4080 UInt lo32
= toUInt(w64
);
4081 return toUChar(lo32
>> 8);
4083 static inline UChar
sel8x8_0 ( ULong w64
) {
4084 UInt lo32
= toUInt(w64
);
4085 return toUChar(lo32
>> 0);
4088 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4089 ULong
amd64g_calculate_mmx_pmaddwd ( ULong xx
, ULong yy
)
4093 (((Int
)(Short
)sel16x4_3(xx
)) * ((Int
)(Short
)sel16x4_3(yy
)))
4094 + (((Int
)(Short
)sel16x4_2(xx
)) * ((Int
)(Short
)sel16x4_2(yy
))),
4095 (((Int
)(Short
)sel16x4_1(xx
)) * ((Int
)(Short
)sel16x4_1(yy
)))
4096 + (((Int
)(Short
)sel16x4_0(xx
)) * ((Int
)(Short
)sel16x4_0(yy
)))
4100 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4101 ULong
amd64g_calculate_mmx_psadbw ( ULong xx
, ULong yy
)
4104 t
+= (UInt
)abdU8( sel8x8_7(xx
), sel8x8_7(yy
) );
4105 t
+= (UInt
)abdU8( sel8x8_6(xx
), sel8x8_6(yy
) );
4106 t
+= (UInt
)abdU8( sel8x8_5(xx
), sel8x8_5(yy
) );
4107 t
+= (UInt
)abdU8( sel8x8_4(xx
), sel8x8_4(yy
) );
4108 t
+= (UInt
)abdU8( sel8x8_3(xx
), sel8x8_3(yy
) );
4109 t
+= (UInt
)abdU8( sel8x8_2(xx
), sel8x8_2(yy
) );
4110 t
+= (UInt
)abdU8( sel8x8_1(xx
), sel8x8_1(yy
) );
4111 t
+= (UInt
)abdU8( sel8x8_0(xx
), sel8x8_0(yy
) );
4116 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4117 ULong
amd64g_calculate_sse_phminposuw ( ULong sLo
, ULong sHi
)
4121 t
= sel16x4_0(sLo
); if (True
) { min
= t
; idx
= 0; }
4122 t
= sel16x4_1(sLo
); if (t
< min
) { min
= t
; idx
= 1; }
4123 t
= sel16x4_2(sLo
); if (t
< min
) { min
= t
; idx
= 2; }
4124 t
= sel16x4_3(sLo
); if (t
< min
) { min
= t
; idx
= 3; }
4125 t
= sel16x4_0(sHi
); if (t
< min
) { min
= t
; idx
= 4; }
4126 t
= sel16x4_1(sHi
); if (t
< min
) { min
= t
; idx
= 5; }
4127 t
= sel16x4_2(sHi
); if (t
< min
) { min
= t
; idx
= 6; }
4128 t
= sel16x4_3(sHi
); if (t
< min
) { min
= t
; idx
= 7; }
4129 return ((ULong
)(idx
<< 16)) | ((ULong
)min
);
4132 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4133 ULong
amd64g_calc_crc32b ( ULong crcIn
, ULong b
)
4136 ULong crc
= (b
& 0xFFULL
) ^ crcIn
;
4137 for (i
= 0; i
< 8; i
++)
4138 crc
= (crc
>> 1) ^ ((crc
& 1) ? 0x82f63b78ULL
: 0);
4142 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4143 ULong
amd64g_calc_crc32w ( ULong crcIn
, ULong w
)
4146 ULong crc
= (w
& 0xFFFFULL
) ^ crcIn
;
4147 for (i
= 0; i
< 16; i
++)
4148 crc
= (crc
>> 1) ^ ((crc
& 1) ? 0x82f63b78ULL
: 0);
4152 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4153 ULong
amd64g_calc_crc32l ( ULong crcIn
, ULong l
)
4156 ULong crc
= (l
& 0xFFFFFFFFULL
) ^ crcIn
;
4157 for (i
= 0; i
< 32; i
++)
4158 crc
= (crc
>> 1) ^ ((crc
& 1) ? 0x82f63b78ULL
: 0);
4162 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4163 ULong
amd64g_calc_crc32q ( ULong crcIn
, ULong q
)
4165 ULong crc
= amd64g_calc_crc32l(crcIn
, q
);
4166 return amd64g_calc_crc32l(crc
, q
>> 32);
4170 /* .. helper for next fn .. */
4171 static inline ULong
sad_8x4 ( ULong xx
, ULong yy
)
4174 t
+= (UInt
)abdU8( sel8x8_3(xx
), sel8x8_3(yy
) );
4175 t
+= (UInt
)abdU8( sel8x8_2(xx
), sel8x8_2(yy
) );
4176 t
+= (UInt
)abdU8( sel8x8_1(xx
), sel8x8_1(yy
) );
4177 t
+= (UInt
)abdU8( sel8x8_0(xx
), sel8x8_0(yy
) );
4181 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4182 ULong
amd64g_calc_mpsadbw ( ULong sHi
, ULong sLo
,
4183 ULong dHi
, ULong dLo
,
4184 ULong imm_and_return_control_bit
)
4186 UInt imm8
= imm_and_return_control_bit
& 7;
4187 Bool calcHi
= (imm_and_return_control_bit
>> 7) & 1;
4188 UInt srcOffsL
= imm8
& 3; /* src offs in 32-bit (L) chunks */
4189 UInt dstOffsL
= (imm8
>> 2) & 1; /* dst offs in ditto chunks */
4190 /* For src we only need 32 bits, so get them into the
4191 lower half of a 64 bit word. */
4192 ULong src
= ((srcOffsL
& 2) ? sHi
: sLo
) >> (32 * (srcOffsL
& 1));
4193 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
4194 11 bytes. If calculating the low part of the result, need bytes
4195 dstOffsL * 4 + (0 .. 6); if calculating the high part,
4196 dstOffsL * 4 + (4 .. 10). */
4198 /* dstOffL = 0, Lo -> 0 .. 6
4199 dstOffL = 1, Lo -> 4 .. 10
4200 dstOffL = 0, Hi -> 4 .. 10
4201 dstOffL = 1, Hi -> 8 .. 14
4203 if (calcHi
&& dstOffsL
) {
4205 dst
= dHi
& 0x00FFFFFFFFFFFFFFULL
;
4207 else if (!calcHi
&& !dstOffsL
) {
4209 dst
= dLo
& 0x00FFFFFFFFFFFFFFULL
;
4213 dst
= (dLo
>> 32) | ((dHi
& 0x00FFFFFFULL
) << 32);
4215 ULong r0
= sad_8x4( dst
>> 0, src
);
4216 ULong r1
= sad_8x4( dst
>> 8, src
);
4217 ULong r2
= sad_8x4( dst
>> 16, src
);
4218 ULong r3
= sad_8x4( dst
>> 24, src
);
4219 ULong res
= (r3
<< 48) | (r2
<< 32) | (r1
<< 16) | r0
;
4223 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4224 ULong
amd64g_calculate_pext ( ULong src_masked
, ULong mask
)
4229 for (src_bit
= 1; src_bit
; src_bit
<<= 1) {
4230 if (mask
& src_bit
) {
4231 if (src_masked
& src_bit
) dst
|= dst_bit
;
4238 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4239 ULong
amd64g_calculate_pdep ( ULong src
, ULong mask
)
4244 for (dst_bit
= 1; dst_bit
; dst_bit
<<= 1) {
4245 if (mask
& dst_bit
) {
4246 if (src
& src_bit
) dst
|= dst_bit
;
4253 /*---------------------------------------------------------------*/
4254 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
4255 /*---------------------------------------------------------------*/
4257 static UInt
zmask_from_V128 ( V128
* arg
)
4260 for (i
= 0; i
< 16; i
++) {
4261 res
|= ((arg
->w8
[i
] == 0) ? 1 : 0) << i
;
4266 static UInt
zmask_from_V128_wide ( V128
* arg
)
4269 for (i
= 0; i
< 8; i
++) {
4270 res
|= ((arg
->w16
[i
] == 0) ? 1 : 0) << i
;
4275 /* Helps with PCMP{I,E}STR{I,M}.
4277 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
4278 actually it could be a clean helper, but for the fact that we can't
4279 pass by value 2 x V128 to a clean helper, nor have one returned.)
4280 Reads guest state, writes to guest state for the xSTRM cases, no
4281 accesses of memory, is a pure function.
4283 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
4284 the callee knows which I/E and I/M variant it is dealing with and
4285 what the specific operation is. 4th byte of opcode is in the range
4292 gstOffL and gstOffR are the guest state offsets for the two XMM
4293 register inputs. We never have to deal with the memory case since
4294 that is handled by pre-loading the relevant value into the fake
4297 For ESTRx variants, edxIN and eaxIN hold the values of those two
4300 In all cases, the bottom 16 bits of the result contain the new
4301 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
4302 result hold the new %ecx value. For xSTRM variants, the helper
4303 writes the result directly to the guest XMM0.
4305 Declarable side effects: in all cases, reads guest state at
4306 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
4309 Is expected to be called with opc_and_imm combinations which have
4310 actually been validated, and will assert if otherwise. The front
4311 end should ensure we're only called with verified values.
4313 ULong
amd64g_dirtyhelper_PCMPxSTRx (
4314 VexGuestAMD64State
* gst
,
4316 HWord gstOffL
, HWord gstOffR
,
4317 HWord edxIN
, HWord eaxIN
4320 HWord opc4
= (opc4_and_imm
>> 8) & 0xFF;
4321 HWord imm8
= opc4_and_imm
& 0xFF;
4322 HWord isISTRx
= opc4
& 2;
4323 HWord isxSTRM
= (opc4
& 1) ^ 1;
4324 vassert((opc4
& 0xFC) == 0x60); /* 0x60 .. 0x63 */
4325 HWord wide
= (imm8
& 1);
4327 // where the args are
4328 V128
* argL
= (V128
*)( ((UChar
*)gst
) + gstOffL
);
4329 V128
* argR
= (V128
*)( ((UChar
*)gst
) + gstOffR
);
4331 /* Create the arg validity masks, either from the vectors
4332 themselves or from the supplied edx/eax values. */
4333 // FIXME: this is only right for the 8-bit data cases.
4334 // At least that is asserted above.
4335 UInt zmaskL
, zmaskR
;
4337 // temp spot for the resulting flags and vector.
4341 // for checking whether case was handled
4346 zmaskL
= zmask_from_V128_wide(argL
);
4347 zmaskR
= zmask_from_V128_wide(argR
);
4350 tmp
= edxIN
& 0xFFFFFFFF;
4351 if (tmp
< -8) tmp
= -8;
4352 if (tmp
> 8) tmp
= 8;
4353 if (tmp
< 0) tmp
= -tmp
;
4354 vassert(tmp
>= 0 && tmp
<= 8);
4355 zmaskL
= (1 << tmp
) & 0xFF;
4356 tmp
= eaxIN
& 0xFFFFFFFF;
4357 if (tmp
< -8) tmp
= -8;
4358 if (tmp
> 8) tmp
= 8;
4359 if (tmp
< 0) tmp
= -tmp
;
4360 vassert(tmp
>= 0 && tmp
<= 8);
4361 zmaskR
= (1 << tmp
) & 0xFF;
4364 ok
= compute_PCMPxSTRx_wide (
4365 &resV
, &resOSZACP
, argL
, argR
,
4366 zmaskL
, zmaskR
, imm8
, (Bool
)isxSTRM
4370 zmaskL
= zmask_from_V128(argL
);
4371 zmaskR
= zmask_from_V128(argR
);
4374 tmp
= edxIN
& 0xFFFFFFFF;
4375 if (tmp
< -16) tmp
= -16;
4376 if (tmp
> 16) tmp
= 16;
4377 if (tmp
< 0) tmp
= -tmp
;
4378 vassert(tmp
>= 0 && tmp
<= 16);
4379 zmaskL
= (1 << tmp
) & 0xFFFF;
4380 tmp
= eaxIN
& 0xFFFFFFFF;
4381 if (tmp
< -16) tmp
= -16;
4382 if (tmp
> 16) tmp
= 16;
4383 if (tmp
< 0) tmp
= -tmp
;
4384 vassert(tmp
>= 0 && tmp
<= 16);
4385 zmaskR
= (1 << tmp
) & 0xFFFF;
4388 ok
= compute_PCMPxSTRx (
4389 &resV
, &resOSZACP
, argL
, argR
,
4390 zmaskL
, zmaskR
, imm8
, (Bool
)isxSTRM
4394 // front end shouldn't pass us any imm8 variants we can't
4398 // So, finally we need to get the results back to the caller.
4399 // In all cases, the new OSZACP value is the lowest 16 of
4400 // the return value.
4402 gst
->guest_YMM0
[0] = resV
.w32
[0];
4403 gst
->guest_YMM0
[1] = resV
.w32
[1];
4404 gst
->guest_YMM0
[2] = resV
.w32
[2];
4405 gst
->guest_YMM0
[3] = resV
.w32
[3];
4406 return resOSZACP
& 0x8D5;
4408 UInt newECX
= resV
.w32
[0] & 0xFFFF;
4409 return (newECX
<< 16) | (resOSZACP
& 0x8D5);
4413 /*---------------------------------------------------------------*/
4414 /*--- AES primitives and helpers ---*/
4415 /*---------------------------------------------------------------*/
4416 /* a 16 x 16 matrix */
4417 static const UChar sbox
[256] = { // row nr
4418 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
4419 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
4420 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
4421 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
4422 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
4423 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
4424 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
4425 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
4426 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
4427 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
4428 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
4429 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
4430 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
4431 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
4432 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
4433 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
4434 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
4435 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
4436 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
4437 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
4438 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
4439 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
4440 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
4441 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
4442 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
4443 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
4444 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
4445 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
4446 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
4447 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
4448 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
4449 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
4451 static void SubBytes (V128
* v
)
4455 for (i
= 0; i
< 16; i
++)
4456 r
.w8
[i
] = sbox
[v
->w8
[i
]];
4460 /* a 16 x 16 matrix */
4461 static const UChar invsbox
[256] = { // row nr
4462 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
4463 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
4464 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
4465 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
4466 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
4467 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
4468 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
4469 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
4470 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
4471 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
4472 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
4473 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
4474 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
4475 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
4476 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
4477 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
4478 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
4479 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
4480 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
4481 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
4482 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
4483 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
4484 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
4485 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
4486 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
4487 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
4488 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
4489 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
4490 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
4491 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
4492 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
4493 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
4495 static void InvSubBytes (V128
* v
)
4499 for (i
= 0; i
< 16; i
++)
4500 r
.w8
[i
] = invsbox
[v
->w8
[i
]];
4504 static const UChar ShiftRows_op
[16] =
4505 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
4506 static void ShiftRows (V128
* v
)
4510 for (i
= 0; i
< 16; i
++)
4511 r
.w8
[i
] = v
->w8
[ShiftRows_op
[15-i
]];
4515 static const UChar InvShiftRows_op
[16] =
4516 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
4517 static void InvShiftRows (V128
* v
)
4521 for (i
= 0; i
< 16; i
++)
4522 r
.w8
[i
] = v
->w8
[InvShiftRows_op
[15-i
]];
4526 /* Multiplication of the finite fields elements of AES.
4527 See "A Specification for The AES Algorithm Rijndael
4528 (by Joan Daemen & Vincent Rijmen)"
4529 Dr. Brian Gladman, v3.1, 3rd March 2001. */
4530 /* N values so that (hex) xy = 0x03^N.
4531 0x00 cannot be used. We put 0xff for this value.*/
4532 /* a 16 x 16 matrix */
4533 static const UChar Nxy
[256] = { // row nr
4534 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4535 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4536 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4537 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4538 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4539 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4540 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4541 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4542 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4543 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4544 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4545 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4546 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4547 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4548 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4549 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4550 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4551 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4552 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4553 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4554 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4555 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4556 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4557 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4558 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4559 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4560 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4561 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4562 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4563 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4564 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4565 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4568 /* E values so that E = 0x03^xy. */
4569 static const UChar Exy
[256] = { // row nr
4570 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4571 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4572 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4573 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4574 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4575 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4576 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4577 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4578 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4579 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4580 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4581 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4582 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4583 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4584 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4585 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4586 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4587 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4588 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4589 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4590 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4591 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4592 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4593 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4594 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4595 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4596 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4597 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4598 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4599 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4600 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4601 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4603 static inline UChar
ff_mul(UChar u1
, UChar u2
)
4605 if ((u1
> 0) && (u2
> 0)) {
4606 UInt ui
= Nxy
[u1
] + Nxy
[u2
];
4615 static void MixColumns (V128
* v
)
4619 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4620 for (j
= 0; j
< 4; j
++) {
4621 P(&r
,j
,0) = ff_mul(0x02, P(v
,j
,0)) ^ ff_mul(0x03, P(v
,j
,1))
4622 ^ P(v
,j
,2) ^ P(v
,j
,3);
4623 P(&r
,j
,1) = P(v
,j
,0) ^ ff_mul( 0x02, P(v
,j
,1) )
4624 ^ ff_mul(0x03, P(v
,j
,2) ) ^ P(v
,j
,3);
4625 P(&r
,j
,2) = P(v
,j
,0) ^ P(v
,j
,1) ^ ff_mul( 0x02, P(v
,j
,2) )
4626 ^ ff_mul(0x03, P(v
,j
,3) );
4627 P(&r
,j
,3) = ff_mul(0x03, P(v
,j
,0) ) ^ P(v
,j
,1) ^ P(v
,j
,2)
4628 ^ ff_mul( 0x02, P(v
,j
,3) );
4634 static void InvMixColumns (V128
* v
)
4638 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4639 for (j
= 0; j
< 4; j
++) {
4640 P(&r
,j
,0) = ff_mul(0x0e, P(v
,j
,0) ) ^ ff_mul(0x0b, P(v
,j
,1) )
4641 ^ ff_mul(0x0d,P(v
,j
,2) ) ^ ff_mul(0x09, P(v
,j
,3) );
4642 P(&r
,j
,1) = ff_mul(0x09, P(v
,j
,0) ) ^ ff_mul(0x0e, P(v
,j
,1) )
4643 ^ ff_mul(0x0b,P(v
,j
,2) ) ^ ff_mul(0x0d, P(v
,j
,3) );
4644 P(&r
,j
,2) = ff_mul(0x0d, P(v
,j
,0) ) ^ ff_mul(0x09, P(v
,j
,1) )
4645 ^ ff_mul(0x0e,P(v
,j
,2) ) ^ ff_mul(0x0b, P(v
,j
,3) );
4646 P(&r
,j
,3) = ff_mul(0x0b, P(v
,j
,0) ) ^ ff_mul(0x0d, P(v
,j
,1) )
4647 ^ ff_mul(0x09,P(v
,j
,2) ) ^ ff_mul(0x0e, P(v
,j
,3) );
4654 /* For description, see definition in guest_amd64_defs.h */
4655 void amd64g_dirtyhelper_AES (
4656 VexGuestAMD64State
* gst
,
4657 HWord opc4
, HWord gstOffD
,
4658 HWord gstOffL
, HWord gstOffR
4661 // where the args are
4662 V128
* argD
= (V128
*)( ((UChar
*)gst
) + gstOffD
);
4663 V128
* argL
= (V128
*)( ((UChar
*)gst
) + gstOffL
);
4664 V128
* argR
= (V128
*)( ((UChar
*)gst
) + gstOffR
);
4668 case 0xDC: /* AESENC */
4669 case 0xDD: /* AESENCLAST */
4675 argD
->w64
[0] = r
.w64
[0] ^ argL
->w64
[0];
4676 argD
->w64
[1] = r
.w64
[1] ^ argL
->w64
[1];
4679 case 0xDE: /* AESDEC */
4680 case 0xDF: /* AESDECLAST */
4686 argD
->w64
[0] = r
.w64
[0] ^ argL
->w64
[0];
4687 argD
->w64
[1] = r
.w64
[1] ^ argL
->w64
[1];
4690 case 0xDB: /* AESIMC */
4692 InvMixColumns (argD
);
4694 default: vassert(0);
4698 static inline UInt
RotWord (UInt w32
)
4700 return ((w32
>> 8) | (w32
<< 24));
4703 static inline UInt
SubWord (UInt w32
)
4710 r8
[0] = sbox
[w8
[0]];
4711 r8
[1] = sbox
[w8
[1]];
4712 r8
[2] = sbox
[w8
[2]];
4713 r8
[3] = sbox
[w8
[3]];
4717 /* For description, see definition in guest_amd64_defs.h */
4718 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4719 VexGuestAMD64State
* gst
,
4721 HWord gstOffL
, HWord gstOffR
4724 // where the args are
4725 V128
* argL
= (V128
*)( ((UChar
*)gst
) + gstOffL
);
4726 V128
* argR
= (V128
*)( ((UChar
*)gst
) + gstOffR
);
4728 // We have to create the result in a temporary in the
4729 // case where the src and dst regs are the same. See #341698.
4732 tmp
.w32
[3] = RotWord (SubWord (argL
->w32
[3])) ^ imm8
;
4733 tmp
.w32
[2] = SubWord (argL
->w32
[3]);
4734 tmp
.w32
[1] = RotWord (SubWord (argL
->w32
[1])) ^ imm8
;
4735 tmp
.w32
[0] = SubWord (argL
->w32
[1]);
4737 argR
->w32
[3] = tmp
.w32
[3];
4738 argR
->w32
[2] = tmp
.w32
[2];
4739 argR
->w32
[1] = tmp
.w32
[1];
4740 argR
->w32
[0] = tmp
.w32
[0];
4745 /*---------------------------------------------------------------*/
4746 /*--- Helpers for dealing with, and describing, ---*/
4747 /*--- guest state as a whole. ---*/
4748 /*---------------------------------------------------------------*/
4750 /* Initialise the entire amd64 guest state. */
4751 /* VISIBLE TO LIBVEX CLIENT */
4752 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State
* vex_state
)
4754 vex_state
->host_EvC_FAILADDR
= 0;
4755 vex_state
->host_EvC_COUNTER
= 0;
4756 vex_state
->pad0
= 0;
4758 vex_state
->guest_RAX
= 0;
4759 vex_state
->guest_RCX
= 0;
4760 vex_state
->guest_RDX
= 0;
4761 vex_state
->guest_RBX
= 0;
4762 vex_state
->guest_RSP
= 0;
4763 vex_state
->guest_RBP
= 0;
4764 vex_state
->guest_RSI
= 0;
4765 vex_state
->guest_RDI
= 0;
4766 vex_state
->guest_R8
= 0;
4767 vex_state
->guest_R9
= 0;
4768 vex_state
->guest_R10
= 0;
4769 vex_state
->guest_R11
= 0;
4770 vex_state
->guest_R12
= 0;
4771 vex_state
->guest_R13
= 0;
4772 vex_state
->guest_R14
= 0;
4773 vex_state
->guest_R15
= 0;
4775 vex_state
->guest_CC_OP
= AMD64G_CC_OP_COPY
;
4776 vex_state
->guest_CC_DEP1
= 0;
4777 vex_state
->guest_CC_DEP2
= 0;
4778 vex_state
->guest_CC_NDEP
= 0;
4780 vex_state
->guest_DFLAG
= 1; /* forwards */
4781 vex_state
->guest_IDFLAG
= 0;
4782 vex_state
->guest_ACFLAG
= 0;
4784 /* HACK: represent the offset associated with a constant %fs.
4785 Typically, on linux, this assumes that %fs is only ever zero (main
4787 vex_state
->guest_FS_CONST
= 0;
4789 vex_state
->guest_RIP
= 0;
4791 /* Initialise the simulated FPU */
4792 amd64g_dirtyhelper_FINIT( vex_state
);
4794 /* Initialise the AVX state. */
4795 # define AVXZERO(_ymm) \
4796 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4797 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4799 vex_state
->guest_SSEROUND
= (ULong
)Irrm_NEAREST
;
4800 AVXZERO(vex_state
->guest_YMM0
);
4801 AVXZERO(vex_state
->guest_YMM1
);
4802 AVXZERO(vex_state
->guest_YMM2
);
4803 AVXZERO(vex_state
->guest_YMM3
);
4804 AVXZERO(vex_state
->guest_YMM4
);
4805 AVXZERO(vex_state
->guest_YMM5
);
4806 AVXZERO(vex_state
->guest_YMM6
);
4807 AVXZERO(vex_state
->guest_YMM7
);
4808 AVXZERO(vex_state
->guest_YMM8
);
4809 AVXZERO(vex_state
->guest_YMM9
);
4810 AVXZERO(vex_state
->guest_YMM10
);
4811 AVXZERO(vex_state
->guest_YMM11
);
4812 AVXZERO(vex_state
->guest_YMM12
);
4813 AVXZERO(vex_state
->guest_YMM13
);
4814 AVXZERO(vex_state
->guest_YMM14
);
4815 AVXZERO(vex_state
->guest_YMM15
);
4816 AVXZERO(vex_state
->guest_YMM16
);
4820 vex_state
->guest_EMNOTE
= EmNote_NONE
;
4822 vex_state
->guest_SETC
= 0;
4824 /* These should not ever be either read or written, but we
4825 initialise them anyway. */
4826 vex_state
->guest_CMSTART
= 0;
4827 vex_state
->guest_CMLEN
= 0;
4829 vex_state
->guest_NRADDR
= 0;
4830 vex_state
->guest_SC_CLASS
= 0;
4831 vex_state
->guest_GS_CONST
= 0;
4833 vex_state
->guest_IP_AT_SYSCALL
= 0;
4834 vex_state
->pad1
= 0;
4838 /* Figure out if any part of the guest state contained in minoff
4839 .. maxoff requires precise memory exceptions. If in doubt return
4840 True (but this generates significantly slower code).
4842 By default we enforce precise exns for guest %RSP, %RBP and %RIP
4843 only. These are the minimum needed to extract correct stack
4844 backtraces from amd64 code.
4846 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4848 Bool
guest_amd64_state_requires_precise_mem_exns (
4849 Int minoff
, Int maxoff
, VexRegisterUpdates pxControl
4852 Int rbp_min
= offsetof(VexGuestAMD64State
, guest_RBP
);
4853 Int rbp_max
= rbp_min
+ 8 - 1;
4854 Int rsp_min
= offsetof(VexGuestAMD64State
, guest_RSP
);
4855 Int rsp_max
= rsp_min
+ 8 - 1;
4856 Int rip_min
= offsetof(VexGuestAMD64State
, guest_RIP
);
4857 Int rip_max
= rip_min
+ 8 - 1;
4859 if (maxoff
< rsp_min
|| minoff
> rsp_max
) {
4860 /* no overlap with rsp */
4861 if (pxControl
== VexRegUpdSpAtMemAccess
)
4862 return False
; // We only need to check stack pointer.
4867 if (maxoff
< rbp_min
|| minoff
> rbp_max
) {
4868 /* no overlap with rbp */
4873 if (maxoff
< rip_min
|| minoff
> rip_max
) {
4874 /* no overlap with eip */
4883 #define ALWAYSDEFD(field) \
4884 { offsetof(VexGuestAMD64State, field), \
4885 (sizeof ((VexGuestAMD64State*)0)->field) }
4890 /* Total size of the guest state, in bytes. */
4891 .total_sizeB
= sizeof(VexGuestAMD64State
),
4893 /* Describe the stack pointer. */
4894 .offset_SP
= offsetof(VexGuestAMD64State
,guest_RSP
),
4897 /* Describe the frame pointer. */
4898 .offset_FP
= offsetof(VexGuestAMD64State
,guest_RBP
),
4901 /* Describe the instruction pointer. */
4902 .offset_IP
= offsetof(VexGuestAMD64State
,guest_RIP
),
4905 /* Describe any sections to be regarded by Memcheck as
4906 'always-defined'. */
4909 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4910 and DEP2 have to be tracked. See detailed comment in
4911 gdefs.h on meaning of thunk fields. */
4913 = { /* 0 */ ALWAYSDEFD(guest_CC_OP
),
4914 /* 1 */ ALWAYSDEFD(guest_CC_NDEP
),
4915 /* 2 */ ALWAYSDEFD(guest_DFLAG
),
4916 /* 3 */ ALWAYSDEFD(guest_IDFLAG
),
4917 /* 4 */ ALWAYSDEFD(guest_RIP
),
4918 /* 5 */ ALWAYSDEFD(guest_FS_CONST
),
4919 /* 6 */ ALWAYSDEFD(guest_FTOP
),
4920 /* 7 */ ALWAYSDEFD(guest_FPTAG
),
4921 /* 8 */ ALWAYSDEFD(guest_FPROUND
),
4922 /* 9 */ ALWAYSDEFD(guest_FC3210
),
4923 // /* */ ALWAYSDEFD(guest_CS),
4924 // /* */ ALWAYSDEFD(guest_DS),
4925 // /* */ ALWAYSDEFD(guest_ES),
4926 // /* */ ALWAYSDEFD(guest_FS),
4927 // /* */ ALWAYSDEFD(guest_GS),
4928 // /* */ ALWAYSDEFD(guest_SS),
4929 // /* */ ALWAYSDEFD(guest_LDT),
4930 // /* */ ALWAYSDEFD(guest_GDT),
4931 /* 10 */ ALWAYSDEFD(guest_EMNOTE
),
4932 /* 11 */ ALWAYSDEFD(guest_SSEROUND
),
4933 /* 12 */ ALWAYSDEFD(guest_CMSTART
),
4934 /* 13 */ ALWAYSDEFD(guest_CMLEN
),
4935 /* 14 */ ALWAYSDEFD(guest_SC_CLASS
),
4936 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL
)
4941 /*---------------------------------------------------------------*/
4942 /*--- end guest_amd64_helpers.c ---*/
4943 /*---------------------------------------------------------------*/