Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / guest_amd64_helpers.c
blobb4e37fcbd66bf0f317dcf0a5f6de55490fa43d53
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_amd64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_amd64.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_amd64_defs.h"
44 #include "guest_generic_x87.h"
47 /* This file contains helper functions for amd64 guest code.
48 Calls to these functions are generated by the back end.
49 These calls are of course in the host machine code and
50 this file will be compiled to host machine code, so that
51 all makes sense.
53 Only change the signatures of these helper functions very
54 carefully. If you change the signature here, you'll have to change
55 the parameters passed to it in the IR calls constructed by
56 guest-amd64/toIR.c.
58 The convention used is that all functions called from generated
59 code are named amd64g_<something>, and any function whose name lacks
60 that prefix is not called from generated code. Note that some
61 LibVEX_* functions can however be called by VEX's client, but that
62 is not the same as calling them from VEX-generated code.
66 /* Set to 1 to get detailed profiling info about use of the flag
67 machinery. */
68 #define PROFILE_RFLAGS 0
71 /*---------------------------------------------------------------*/
72 /*--- %rflags run-time helpers. ---*/
73 /*---------------------------------------------------------------*/
75 /* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
76 after imulq/mulq. */
78 static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
80 const Long halfMask = 0xFFFFFFFFLL;
81 ULong u0, v0, w0;
82 Long u1, v1, w1, w2, t;
83 u0 = u & halfMask;
84 u1 = u >> 32;
85 v0 = v & halfMask;
86 v1 = v >> 32;
87 w0 = u0 * v0;
88 t = u1 * v0 + (w0 >> 32);
89 w1 = t & halfMask;
90 w2 = t >> 32;
91 w1 = u0 * v1 + w1;
92 *rHi = u1 * v1 + w2 + (w1 >> 32);
93 *rLo = (Long)((ULong)u * (ULong)v);
96 static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
98 const ULong halfMask = 0xFFFFFFFFULL;
99 ULong u0, v0, w0;
100 ULong u1, v1, w1,w2,t;
101 u0 = u & halfMask;
102 u1 = u >> 32;
103 v0 = v & halfMask;
104 v1 = v >> 32;
105 w0 = u0 * v0;
106 t = u1 * v0 + (w0 >> 32);
107 w1 = t & halfMask;
108 w2 = t >> 32;
109 w1 = u0 * v1 + w1;
110 *rHi = u1 * v1 + w2 + (w1 >> 32);
111 *rLo = u * v;
115 static const UChar parity_table[256] = {
116 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
117 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
118 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
119 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
120 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
121 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
122 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
123 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
124 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
125 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
126 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
127 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
128 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
129 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
130 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
131 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
132 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
133 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
134 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
135 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
136 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
137 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
138 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
139 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
140 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
141 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
142 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
143 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
144 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
145 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
146 AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
147 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
150 /* generalised left-shifter */
151 static inline Long lshift ( Long x, Int n )
153 if (n >= 0)
154 return (ULong)x << n;
155 else
156 return x >> (-n);
159 /* identity on ULong */
160 static inline ULong idULong ( ULong x )
162 return x;
166 #define PREAMBLE(__data_bits) \
167 /* const */ ULong DATA_MASK \
168 = __data_bits==8 \
169 ? 0xFFULL \
170 : (__data_bits==16 \
171 ? 0xFFFFULL \
172 : (__data_bits==32 \
173 ? 0xFFFFFFFFULL \
174 : 0xFFFFFFFFFFFFFFFFULL)); \
175 /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
176 /* const */ ULong CC_DEP1 = cc_dep1_formal; \
177 /* const */ ULong CC_DEP2 = cc_dep2_formal; \
178 /* const */ ULong CC_NDEP = cc_ndep_formal; \
179 /* Four bogus assignments, which hopefully gcc can */ \
180 /* optimise away, and which stop it complaining about */ \
181 /* unused variables. */ \
182 SIGN_MASK = SIGN_MASK; \
183 DATA_MASK = DATA_MASK; \
184 CC_DEP2 = CC_DEP2; \
185 CC_NDEP = CC_NDEP;
188 /*-------------------------------------------------------------*/
190 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
192 PREAMBLE(DATA_BITS); \
193 { ULong cf, pf, af, zf, sf, of; \
194 ULong argL, argR, res; \
195 argL = CC_DEP1; \
196 argR = CC_DEP2; \
197 res = argL + argR; \
198 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
199 pf = parity_table[(UChar)res]; \
200 af = (res ^ argL ^ argR) & 0x10; \
201 zf = ((DATA_UTYPE)res == 0) << 6; \
202 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
203 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
204 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
205 return cf | pf | af | zf | sf | of; \
209 /*-------------------------------------------------------------*/
211 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
213 PREAMBLE(DATA_BITS); \
214 { ULong cf, pf, af, zf, sf, of; \
215 ULong argL, argR, res; \
216 argL = CC_DEP1; \
217 argR = CC_DEP2; \
218 res = argL - argR; \
219 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
220 pf = parity_table[(UChar)res]; \
221 af = (res ^ argL ^ argR) & 0x10; \
222 zf = ((DATA_UTYPE)res == 0) << 6; \
223 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
224 of = lshift((argL ^ argR) & (argL ^ res), \
225 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
226 return cf | pf | af | zf | sf | of; \
230 /*-------------------------------------------------------------*/
232 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
234 PREAMBLE(DATA_BITS); \
235 { ULong cf, pf, af, zf, sf, of; \
236 ULong argL, argR, oldC, res; \
237 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
238 argL = CC_DEP1; \
239 argR = CC_DEP2 ^ oldC; \
240 res = (argL + argR) + oldC; \
241 if (oldC) \
242 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
243 else \
244 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
245 pf = parity_table[(UChar)res]; \
246 af = (res ^ argL ^ argR) & 0x10; \
247 zf = ((DATA_UTYPE)res == 0) << 6; \
248 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
249 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
250 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
251 return cf | pf | af | zf | sf | of; \
255 /*-------------------------------------------------------------*/
257 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
259 PREAMBLE(DATA_BITS); \
260 { ULong cf, pf, af, zf, sf, of; \
261 ULong argL, argR, oldC, res; \
262 oldC = CC_NDEP & AMD64G_CC_MASK_C; \
263 argL = CC_DEP1; \
264 argR = CC_DEP2 ^ oldC; \
265 res = (argL - argR) - oldC; \
266 if (oldC) \
267 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
268 else \
269 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
270 pf = parity_table[(UChar)res]; \
271 af = (res ^ argL ^ argR) & 0x10; \
272 zf = ((DATA_UTYPE)res == 0) << 6; \
273 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
274 of = lshift((argL ^ argR) & (argL ^ res), \
275 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
276 return cf | pf | af | zf | sf | of; \
280 /*-------------------------------------------------------------*/
282 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
284 PREAMBLE(DATA_BITS); \
285 { ULong cf, pf, af, zf, sf, of; \
286 cf = 0; \
287 pf = parity_table[(UChar)CC_DEP1]; \
288 af = 0; \
289 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
290 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
291 of = 0; \
292 return cf | pf | af | zf | sf | of; \
296 /*-------------------------------------------------------------*/
298 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
300 PREAMBLE(DATA_BITS); \
301 { ULong cf, pf, af, zf, sf, of; \
302 ULong argL, argR, res; \
303 res = CC_DEP1; \
304 argL = res - 1; \
305 argR = 1; \
306 cf = CC_NDEP & AMD64G_CC_MASK_C; \
307 pf = parity_table[(UChar)res]; \
308 af = (res ^ argL ^ argR) & 0x10; \
309 zf = ((DATA_UTYPE)res == 0) << 6; \
310 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
311 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
312 return cf | pf | af | zf | sf | of; \
316 /*-------------------------------------------------------------*/
318 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
320 PREAMBLE(DATA_BITS); \
321 { ULong cf, pf, af, zf, sf, of; \
322 ULong argL, argR, res; \
323 res = CC_DEP1; \
324 argL = res + 1; \
325 argR = 1; \
326 cf = CC_NDEP & AMD64G_CC_MASK_C; \
327 pf = parity_table[(UChar)res]; \
328 af = (res ^ argL ^ argR) & 0x10; \
329 zf = ((DATA_UTYPE)res == 0) << 6; \
330 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
331 of = ((res & DATA_MASK) \
332 == ((ULong)SIGN_MASK - 1)) << 11; \
333 return cf | pf | af | zf | sf | of; \
337 /*-------------------------------------------------------------*/
339 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
341 PREAMBLE(DATA_BITS); \
342 { ULong cf, pf, af, zf, sf, of; \
343 cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
344 pf = parity_table[(UChar)CC_DEP1]; \
345 af = 0; /* undefined */ \
346 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
347 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
348 /* of is defined if shift count == 1 */ \
349 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
350 & AMD64G_CC_MASK_O; \
351 return cf | pf | af | zf | sf | of; \
355 /*-------------------------------------------------------------*/
357 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
359 PREAMBLE(DATA_BITS); \
360 { ULong cf, pf, af, zf, sf, of; \
361 cf = CC_DEP2 & 1; \
362 pf = parity_table[(UChar)CC_DEP1]; \
363 af = 0; /* undefined */ \
364 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
365 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
366 /* of is defined if shift count == 1 */ \
367 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
368 & AMD64G_CC_MASK_O; \
369 return cf | pf | af | zf | sf | of; \
373 /*-------------------------------------------------------------*/
375 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
376 /* DEP1 = result, NDEP = old flags */
377 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
379 PREAMBLE(DATA_BITS); \
380 { ULong fl \
381 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
382 | (AMD64G_CC_MASK_C & CC_DEP1) \
383 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
384 11-(DATA_BITS-1)) \
385 ^ lshift(CC_DEP1, 11))); \
386 return fl; \
390 /*-------------------------------------------------------------*/
392 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
393 /* DEP1 = result, NDEP = old flags */
394 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
396 PREAMBLE(DATA_BITS); \
397 { ULong fl \
398 = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
399 | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
400 | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
401 11-(DATA_BITS-1)) \
402 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
403 return fl; \
407 /*-------------------------------------------------------------*/
409 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
410 DATA_U2TYPE, NARROWto2U) \
412 PREAMBLE(DATA_BITS); \
413 { ULong cf, pf, af, zf, sf, of; \
414 DATA_UTYPE hi; \
415 DATA_UTYPE lo \
416 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
417 * ((DATA_UTYPE)CC_DEP2) ); \
418 DATA_U2TYPE rr \
419 = NARROWto2U( \
420 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
421 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
422 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
423 cf = (hi != 0); \
424 pf = parity_table[(UChar)lo]; \
425 af = 0; /* undefined */ \
426 zf = (lo == 0) << 6; \
427 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
428 of = cf << 11; \
429 return cf | pf | af | zf | sf | of; \
433 /*-------------------------------------------------------------*/
435 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
436 DATA_S2TYPE, NARROWto2S) \
438 PREAMBLE(DATA_BITS); \
439 { ULong cf, pf, af, zf, sf, of; \
440 DATA_STYPE hi; \
441 DATA_STYPE lo \
442 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
443 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
444 DATA_S2TYPE rr \
445 = NARROWto2S( \
446 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
447 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
448 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
449 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
450 pf = parity_table[(UChar)lo]; \
451 af = 0; /* undefined */ \
452 zf = (lo == 0) << 6; \
453 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
454 of = cf << 11; \
455 return cf | pf | af | zf | sf | of; \
459 /*-------------------------------------------------------------*/
461 #define ACTIONS_UMULQ \
463 PREAMBLE(64); \
464 { ULong cf, pf, af, zf, sf, of; \
465 ULong lo, hi; \
466 mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
467 cf = (hi != 0); \
468 pf = parity_table[(UChar)lo]; \
469 af = 0; /* undefined */ \
470 zf = (lo == 0) << 6; \
471 sf = lshift(lo, 8 - 64) & 0x80; \
472 of = cf << 11; \
473 return cf | pf | af | zf | sf | of; \
477 /*-------------------------------------------------------------*/
479 #define ACTIONS_SMULQ \
481 PREAMBLE(64); \
482 { ULong cf, pf, af, zf, sf, of; \
483 Long lo, hi; \
484 mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
485 cf = (hi != (lo >>/*s*/ (64-1))); \
486 pf = parity_table[(UChar)lo]; \
487 af = 0; /* undefined */ \
488 zf = (lo == 0) << 6; \
489 sf = lshift(lo, 8 - 64) & 0x80; \
490 of = cf << 11; \
491 return cf | pf | af | zf | sf | of; \
495 /*-------------------------------------------------------------*/
497 #define ACTIONS_ANDN(DATA_BITS,DATA_UTYPE) \
499 PREAMBLE(DATA_BITS); \
500 { ULong cf, pf, af, zf, sf, of; \
501 cf = 0; \
502 pf = 0; \
503 af = 0; \
504 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
505 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
506 of = 0; \
507 return cf | pf | af | zf | sf | of; \
511 /*-------------------------------------------------------------*/
513 #define ACTIONS_BLSI(DATA_BITS,DATA_UTYPE) \
515 PREAMBLE(DATA_BITS); \
516 { ULong cf, pf, af, zf, sf, of; \
517 cf = ((DATA_UTYPE)CC_DEP2 != 0); \
518 pf = 0; \
519 af = 0; \
520 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
521 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
522 of = 0; \
523 return cf | pf | af | zf | sf | of; \
527 /*-------------------------------------------------------------*/
529 #define ACTIONS_BLSMSK(DATA_BITS,DATA_UTYPE) \
531 PREAMBLE(DATA_BITS); \
532 { Long cf, pf, af, zf, sf, of; \
533 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
534 pf = 0; \
535 af = 0; \
536 zf = 0; \
537 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
538 of = 0; \
539 return cf | pf | af | zf | sf | of; \
543 /*-------------------------------------------------------------*/
545 #define ACTIONS_BLSR(DATA_BITS,DATA_UTYPE) \
547 PREAMBLE(DATA_BITS); \
548 { ULong cf, pf, af, zf, sf, of; \
549 cf = ((DATA_UTYPE)CC_DEP2 == 0); \
550 pf = 0; \
551 af = 0; \
552 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
553 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
554 of = 0; \
555 return cf | pf | af | zf | sf | of; \
559 /*-------------------------------------------------------------*/
561 #define ACTIONS_ADX(DATA_BITS,DATA_UTYPE,FLAGNAME) \
563 PREAMBLE(DATA_BITS); \
564 { ULong ocf; /* o or c */ \
565 ULong argL, argR, oldOC, res; \
566 oldOC = (CC_NDEP >> AMD64G_CC_SHIFT_##FLAGNAME) & 1; \
567 argL = CC_DEP1; \
568 argR = CC_DEP2 ^ oldOC; \
569 res = (argL + argR) + oldOC; \
570 if (oldOC) \
571 ocf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
572 else \
573 ocf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
574 return (CC_NDEP & ~AMD64G_CC_MASK_##FLAGNAME) \
575 | (ocf << AMD64G_CC_SHIFT_##FLAGNAME); \
579 /*-------------------------------------------------------------*/
582 #if PROFILE_RFLAGS
584 static Bool initted = False;
586 /* C flag, fast route */
587 static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
588 /* C flag, slow route */
589 static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
590 /* table for calculate_cond */
591 static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
592 /* total entry counts for calc_all, calc_c, calc_cond. */
593 static UInt n_calc_all = 0;
594 static UInt n_calc_c = 0;
595 static UInt n_calc_cond = 0;
597 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
600 static void showCounts ( void )
602 Int op, co;
603 HChar ch;
604 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
605 n_calc_all, n_calc_cond, n_calc_c);
607 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
608 " S NS P NP L NL LE NLE\n");
609 vex_printf(" -----------------------------------------------------"
610 "----------------------------------------\n");
611 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
613 ch = ' ';
614 if (op > 0 && (op-1) % 4 == 0)
615 ch = 'B';
616 if (op > 0 && (op-1) % 4 == 1)
617 ch = 'W';
618 if (op > 0 && (op-1) % 4 == 2)
619 ch = 'L';
620 if (op > 0 && (op-1) % 4 == 3)
621 ch = 'Q';
623 vex_printf("%2d%c: ", op, ch);
624 vex_printf("%6u ", tabc_slow[op]);
625 vex_printf("%6u ", tabc_fast[op]);
626 for (co = 0; co < 16; co++) {
627 Int n = tab_cond[op][co];
628 if (n >= 1000) {
629 vex_printf(" %3dK", n / 1000);
630 } else
631 if (n >= 0) {
632 vex_printf(" %3d ", n );
633 } else {
634 vex_printf(" ");
637 vex_printf("\n");
639 vex_printf("\n");
642 static void initCounts ( void )
644 Int op, co;
645 initted = True;
646 for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
647 tabc_fast[op] = tabc_slow[op] = 0;
648 for (co = 0; co < 16; co++)
649 tab_cond[op][co] = 0;
653 #endif /* PROFILE_RFLAGS */
656 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
657 /* Calculate all the 6 flags from the supplied thunk parameters.
658 Worker function, not directly called from generated code. */
659 static
660 ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
661 ULong cc_dep1_formal,
662 ULong cc_dep2_formal,
663 ULong cc_ndep_formal )
665 switch (cc_op) {
666 case AMD64G_CC_OP_COPY:
667 return cc_dep1_formal
668 & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
669 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
671 case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
672 case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
673 case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
674 case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
676 case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
677 case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
678 case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
679 case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
681 case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
682 case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
683 case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
684 case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
686 case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
687 case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
688 case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
689 case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
691 case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
692 case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
693 case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
694 case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
696 case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
697 case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
698 case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
699 case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
701 case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
702 case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
703 case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
704 case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
706 case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
707 case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
708 case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
709 case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
711 case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
712 case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
713 case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
714 case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
716 case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
717 case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
718 case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
719 case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
721 case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
722 case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
723 case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
724 case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
726 case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
727 UShort, toUShort );
728 case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
729 UInt, toUInt );
730 case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
731 ULong, idULong );
733 case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
735 case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
736 Short, toUShort );
737 case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
738 Int, toUInt );
739 case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
740 Long, idULong );
742 case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
744 case AMD64G_CC_OP_ANDN32: ACTIONS_ANDN( 32, UInt );
745 case AMD64G_CC_OP_ANDN64: ACTIONS_ANDN( 64, ULong );
747 case AMD64G_CC_OP_BLSI32: ACTIONS_BLSI( 32, UInt );
748 case AMD64G_CC_OP_BLSI64: ACTIONS_BLSI( 64, ULong );
750 case AMD64G_CC_OP_BLSMSK32: ACTIONS_BLSMSK( 32, UInt );
751 case AMD64G_CC_OP_BLSMSK64: ACTIONS_BLSMSK( 64, ULong );
753 case AMD64G_CC_OP_BLSR32: ACTIONS_BLSR( 32, UInt );
754 case AMD64G_CC_OP_BLSR64: ACTIONS_BLSR( 64, ULong );
756 case AMD64G_CC_OP_ADCX32: ACTIONS_ADX( 32, UInt, C );
757 case AMD64G_CC_OP_ADCX64: ACTIONS_ADX( 64, ULong, C );
759 case AMD64G_CC_OP_ADOX32: ACTIONS_ADX( 32, UInt, O );
760 case AMD64G_CC_OP_ADOX64: ACTIONS_ADX( 64, ULong, O );
762 default:
763 /* shouldn't really make these calls from generated code */
764 vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
765 "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
766 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
767 vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
771 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
772 /* Calculate all the 6 flags from the supplied thunk parameters. */
773 ULong amd64g_calculate_rflags_all ( ULong cc_op,
774 ULong cc_dep1,
775 ULong cc_dep2,
776 ULong cc_ndep )
778 # if PROFILE_RFLAGS
779 if (!initted) initCounts();
780 n_calc_all++;
781 if (SHOW_COUNTS_NOW) showCounts();
782 # endif
783 return
784 amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
788 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
789 /* Calculate just the carry flag from the supplied thunk parameters. */
790 ULong amd64g_calculate_rflags_c ( ULong cc_op,
791 ULong cc_dep1,
792 ULong cc_dep2,
793 ULong cc_ndep )
795 # if PROFILE_RFLAGS
796 if (!initted) initCounts();
797 n_calc_c++;
798 tabc_fast[cc_op]++;
799 if (SHOW_COUNTS_NOW) showCounts();
800 # endif
802 /* Fast-case some common ones. */
803 switch (cc_op) {
804 case AMD64G_CC_OP_COPY:
805 return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
806 case AMD64G_CC_OP_LOGICQ:
807 case AMD64G_CC_OP_LOGICL:
808 case AMD64G_CC_OP_LOGICW:
809 case AMD64G_CC_OP_LOGICB:
810 return 0;
811 // case AMD64G_CC_OP_SUBL:
812 // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
813 // ? AMD64G_CC_MASK_C : 0;
814 // case AMD64G_CC_OP_SUBW:
815 // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
816 // ? AMD64G_CC_MASK_C : 0;
817 // case AMD64G_CC_OP_SUBB:
818 // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
819 // ? AMD64G_CC_MASK_C : 0;
820 // case AMD64G_CC_OP_INCL:
821 // case AMD64G_CC_OP_DECL:
822 // return cc_ndep & AMD64G_CC_MASK_C;
823 default:
824 break;
827 # if PROFILE_RFLAGS
828 tabc_fast[cc_op]--;
829 tabc_slow[cc_op]++;
830 # endif
832 return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
833 & AMD64G_CC_MASK_C;
837 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
838 /* returns 1 or 0 */
839 ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
840 ULong cc_op,
841 ULong cc_dep1,
842 ULong cc_dep2,
843 ULong cc_ndep )
845 ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
846 cc_dep2, cc_ndep);
847 ULong of,sf,zf,cf,pf;
848 ULong inv = cond & 1;
850 # if PROFILE_RFLAGS
851 if (!initted) initCounts();
852 tab_cond[cc_op][cond]++;
853 n_calc_cond++;
854 if (SHOW_COUNTS_NOW) showCounts();
855 # endif
857 switch (cond) {
858 case AMD64CondNO:
859 case AMD64CondO: /* OF == 1 */
860 of = rflags >> AMD64G_CC_SHIFT_O;
861 return 1 & (inv ^ of);
863 case AMD64CondNZ:
864 case AMD64CondZ: /* ZF == 1 */
865 zf = rflags >> AMD64G_CC_SHIFT_Z;
866 return 1 & (inv ^ zf);
868 case AMD64CondNB:
869 case AMD64CondB: /* CF == 1 */
870 cf = rflags >> AMD64G_CC_SHIFT_C;
871 return 1 & (inv ^ cf);
872 break;
874 case AMD64CondNBE:
875 case AMD64CondBE: /* (CF or ZF) == 1 */
876 cf = rflags >> AMD64G_CC_SHIFT_C;
877 zf = rflags >> AMD64G_CC_SHIFT_Z;
878 return 1 & (inv ^ (cf | zf));
879 break;
881 case AMD64CondNS:
882 case AMD64CondS: /* SF == 1 */
883 sf = rflags >> AMD64G_CC_SHIFT_S;
884 return 1 & (inv ^ sf);
886 case AMD64CondNP:
887 case AMD64CondP: /* PF == 1 */
888 pf = rflags >> AMD64G_CC_SHIFT_P;
889 return 1 & (inv ^ pf);
891 case AMD64CondNL:
892 case AMD64CondL: /* (SF xor OF) == 1 */
893 sf = rflags >> AMD64G_CC_SHIFT_S;
894 of = rflags >> AMD64G_CC_SHIFT_O;
895 return 1 & (inv ^ (sf ^ of));
896 break;
898 case AMD64CondNLE:
899 case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
900 sf = rflags >> AMD64G_CC_SHIFT_S;
901 of = rflags >> AMD64G_CC_SHIFT_O;
902 zf = rflags >> AMD64G_CC_SHIFT_Z;
903 return 1 & (inv ^ ((sf ^ of) | zf));
904 break;
906 default:
907 /* shouldn't really make these calls from generated code */
908 vex_printf("amd64g_calculate_condition"
909 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
910 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
911 vpanic("amd64g_calculate_condition");
916 /* VISIBLE TO LIBVEX CLIENT */
917 ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/const VexGuestAMD64State* vex_state )
919 ULong rflags = amd64g_calculate_rflags_all_WRK(
920 vex_state->guest_CC_OP,
921 vex_state->guest_CC_DEP1,
922 vex_state->guest_CC_DEP2,
923 vex_state->guest_CC_NDEP
925 Long dflag = vex_state->guest_DFLAG;
926 vassert(dflag == 1 || dflag == -1);
927 if (dflag == -1)
928 rflags |= (1<<10);
929 if (vex_state->guest_IDFLAG == 1)
930 rflags |= (1<<21);
931 if (vex_state->guest_ACFLAG == 1)
932 rflags |= (1<<18);
934 return rflags;
937 /* VISIBLE TO LIBVEX CLIENT */
938 void
939 LibVEX_GuestAMD64_put_rflags ( ULong rflags,
940 /*MOD*/VexGuestAMD64State* vex_state )
942 /* D flag */
943 if (rflags & AMD64G_CC_MASK_D) {
944 vex_state->guest_DFLAG = -1;
945 rflags &= ~AMD64G_CC_MASK_D;
947 else
948 vex_state->guest_DFLAG = 1;
950 /* ID flag */
951 if (rflags & AMD64G_CC_MASK_ID) {
952 vex_state->guest_IDFLAG = 1;
953 rflags &= ~AMD64G_CC_MASK_ID;
955 else
956 vex_state->guest_IDFLAG = 0;
958 /* AC flag */
959 if (rflags & AMD64G_CC_MASK_AC) {
960 vex_state->guest_ACFLAG = 1;
961 rflags &= ~AMD64G_CC_MASK_AC;
963 else
964 vex_state->guest_ACFLAG = 0;
966 UInt cc_mask = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z |
967 AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P;
968 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
969 vex_state->guest_CC_DEP1 = rflags & cc_mask;
970 vex_state->guest_CC_DEP2 = 0;
971 vex_state->guest_CC_NDEP = 0;
974 /* VISIBLE TO LIBVEX CLIENT */
975 void
976 LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
977 /*MOD*/VexGuestAMD64State* vex_state )
979 ULong oszacp = amd64g_calculate_rflags_all_WRK(
980 vex_state->guest_CC_OP,
981 vex_state->guest_CC_DEP1,
982 vex_state->guest_CC_DEP2,
983 vex_state->guest_CC_NDEP
985 if (new_carry_flag & 1) {
986 oszacp |= AMD64G_CC_MASK_C;
987 } else {
988 oszacp &= ~AMD64G_CC_MASK_C;
990 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
991 vex_state->guest_CC_DEP1 = oszacp;
992 vex_state->guest_CC_DEP2 = 0;
993 vex_state->guest_CC_NDEP = 0;
996 /*---------------------------------------------------------------*/
997 /*--- %rflags translation-time function specialisers. ---*/
998 /*--- These help iropt specialise calls the above run-time ---*/
999 /*--- %rflags functions. ---*/
1000 /*---------------------------------------------------------------*/
1002 /* Used by the optimiser to try specialisations. Returns an
1003 equivalent expression, or NULL if none. */
1005 static inline Bool isU64 ( IRExpr* e, ULong n )
1007 return e->tag == Iex_Const
1008 && e->Iex.Const.con->tag == Ico_U64
1009 && e->Iex.Const.con->Ico.U64 == n;
1012 /* Returns N if W64 is a value of the form 1 << N for N in 1 to 31,
1013 and zero in any other case. */
1014 static Int isU64_1_shl_N_literal ( ULong w64 )
1016 if (w64 < (1ULL << 1) || w64 > (1ULL << 31))
1017 return 0;
1018 if ((w64 & (w64 - 1)) != 0)
1019 return 0;
1020 /* At this point, we know w64 is a power of two in the range 2^1 .. 2^31,
1021 and we only need to find out which one it is. */
1022 for (Int n = 1; n <= 31; n++) {
1023 if (w64 == (1ULL << n))
1024 return n;
1026 /* Consequently we should never get here. */
1027 /*UNREACHED*/
1028 vassert(0);
1029 return 0;
1032 /* Returns N if E is an immediate of the form 1 << N for N in 1 to 31,
1033 and zero in any other case. */
1034 static Int isU64_1_shl_N ( IRExpr* e )
1036 if (e->tag != Iex_Const || e->Iex.Const.con->tag != Ico_U64)
1037 return 0;
1038 ULong w64 = e->Iex.Const.con->Ico.U64;
1039 return isU64_1_shl_N_literal(w64);
1042 /* Returns N if E is an immediate of the form (1 << N) - 1 for N in 1 to 31,
1043 and zero in any other case. */
1044 static Int isU64_1_shl_N_minus_1 ( IRExpr* e )
1046 if (e->tag != Iex_Const || e->Iex.Const.con->tag != Ico_U64)
1047 return 0;
1048 ULong w64 = e->Iex.Const.con->Ico.U64;
1049 // This isn't actually necessary since isU64_1_shl_N_literal will return
1050 // zero given a zero argument, but still ..
1051 if (w64 == 0xFFFFFFFFFFFFFFFFULL)
1052 return 0;
1053 return isU64_1_shl_N_literal(w64 + 1);
1056 IRExpr* guest_amd64_spechelper ( const HChar* function_name,
1057 IRExpr** args,
1058 IRStmt** precedingStmts,
1059 Int n_precedingStmts )
1061 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1062 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1063 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1064 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1065 # define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
1066 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1068 Int i, arity = 0;
1069 for (i = 0; args[i]; i++)
1070 arity++;
1071 # if 0
1072 vex_printf("spec request:\n");
1073 vex_printf(" %s ", function_name);
1074 for (i = 0; i < arity; i++) {
1075 vex_printf(" ");
1076 ppIRExpr(args[i]);
1078 vex_printf("\n");
1079 # endif
1081 /* --------- specialising "amd64g_calculate_condition" --------- */
1083 if (vex_streq(function_name, "amd64g_calculate_condition")) {
1084 /* specialise calls to above "calculate condition" function */
1085 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
1086 vassert(arity == 5);
1087 cond = args[0];
1088 cc_op = args[1];
1089 cc_dep1 = args[2];
1090 cc_dep2 = args[3];
1092 /*---------------- ADDQ ----------------*/
1094 /* 4, */
1095 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
1096 /* long long add, then Z --> test (dst+src == 0) */
1097 return unop(Iop_1Uto64,
1098 binop(Iop_CmpEQ64,
1099 binop(Iop_Add64, cc_dep1, cc_dep2),
1100 mkU64(0)));
1103 /* 8, */
1104 if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondS)) {
1105 /* long long add, then S (negative)
1106 --> (dst+src)[63]
1107 --> ((dst + src) >>u 63) & 1
1109 return binop(Iop_And64,
1110 binop(Iop_Shr64,
1111 binop(Iop_Add64, cc_dep1, cc_dep2),
1112 mkU8(63)),
1113 mkU64(1));
1116 /*---------------- ADDL ----------------*/
1118 /* 0, */
1119 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondO)) {
1120 /* This is very commonly generated by Javascript JITs, for
1121 the idiom "do a 32-bit add and jump to out-of-line code if
1122 an overflow occurs". */
1123 /* long add, then O (overflow)
1124 --> ((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 + dep2)))[31]
1125 --> (((dep1 ^ dep2 ^ -1) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1126 --> (((not(dep1 ^ dep2)) & (dep1 ^ (dep1 +64 dep2))) >>u 31) & 1
1128 vassert(isIRAtom(cc_dep1));
1129 vassert(isIRAtom(cc_dep2));
1130 return
1131 binop(Iop_And64,
1132 binop(Iop_Shr64,
1133 binop(Iop_And64,
1134 unop(Iop_Not64,
1135 binop(Iop_Xor64, cc_dep1, cc_dep2)),
1136 binop(Iop_Xor64,
1137 cc_dep1,
1138 binop(Iop_Add64, cc_dep1, cc_dep2))),
1139 mkU8(31)),
1140 mkU64(1));
1144 /* 4, */
1145 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondZ)) {
1146 /* long add, then Z --> test ((int)(dst+src) == 0) */
1147 return unop(Iop_1Uto64,
1148 binop(Iop_CmpEQ32,
1149 unop(Iop_64to32, binop(Iop_Add64, cc_dep1, cc_dep2)),
1150 mkU32(0)));
1153 /* 8, 9 */
1154 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondS)) {
1155 /* long add, then S (negative)
1156 --> (dst+src)[31]
1157 --> ((dst +64 src) >>u 31) & 1
1158 Pointless to narrow the args to 32 bit before the add. */
1159 return binop(Iop_And64,
1160 binop(Iop_Shr64,
1161 binop(Iop_Add64, cc_dep1, cc_dep2),
1162 mkU8(31)),
1163 mkU64(1));
1165 if (isU64(cc_op, AMD64G_CC_OP_ADDL) && isU64(cond, AMD64CondNS)) {
1166 /* long add, then NS (not negative)
1167 --> (dst+src)[31] ^ 1
1168 --> (((dst +64 src) >>u 31) & 1) ^ 1
1169 Pointless to narrow the args to 32 bit before the add. */
1170 return binop(Iop_Xor64,
1171 binop(Iop_And64,
1172 binop(Iop_Shr64,
1173 binop(Iop_Add64, cc_dep1, cc_dep2),
1174 mkU8(31)),
1175 mkU64(1)),
1176 mkU64(1));
1179 /*---------------- ADDW ----------------*/
1181 /* 4, */
1182 if (isU64(cc_op, AMD64G_CC_OP_ADDW) && isU64(cond, AMD64CondZ)) {
1184 /* word add, then Z --> test ((short)(dst+src) == 0) */
1185 return unop(Iop_1Uto64,
1186 binop(Iop_CmpEQ16,
1187 unop(Iop_64to16, binop(Iop_Add64, cc_dep1, cc_dep2)),
1188 mkU16(0)));
1191 /*---------------- ADDB ----------------*/
1193 /* 4, */
1194 if (isU64(cc_op, AMD64G_CC_OP_ADDB) && isU64(cond, AMD64CondZ)) {
1195 /* byte add, then Z --> test ((char)(dst+src) == 0) */
1196 return unop(Iop_1Uto64,
1197 binop(Iop_CmpEQ8,
1198 unop(Iop_64to8, binop(Iop_Add64, cc_dep1, cc_dep2)),
1199 mkU8(0)));
1202 /*---------------- SUBQ ----------------*/
1204 /* 0, */
1205 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondO)) {
1206 /* long long sub/cmp, then O (overflow)
1207 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[63]
1208 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2))) >>u 63
1210 vassert(isIRAtom(cc_dep1));
1211 vassert(isIRAtom(cc_dep2));
1212 return binop(Iop_Shr64,
1213 binop(Iop_And64,
1214 binop(Iop_Xor64, cc_dep1, cc_dep2),
1215 binop(Iop_Xor64,
1216 cc_dep1,
1217 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1218 mkU8(63));
1220 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNO)) {
1221 /* No action. Never yet found a test case. */
1224 /* 2, 3 */
1225 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
1226 /* long long sub/cmp, then B (unsigned less than)
1227 --> test dst <u src */
1228 return unop(Iop_1Uto64,
1229 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1231 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
1232 /* long long sub/cmp, then NB (unsigned greater than or equal)
1233 --> test src <=u dst */
1234 /* Note, args are opposite way round from the usual */
1235 return unop(Iop_1Uto64,
1236 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1239 /* 4, 5 */
1240 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
1241 /* long long sub/cmp, then Z --> test dst==src */
1242 return unop(Iop_1Uto64,
1243 binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
1245 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
1246 /* long long sub/cmp, then NZ --> test dst!=src */
1247 return unop(Iop_1Uto64,
1248 binop(Iop_CmpNE64,cc_dep1,cc_dep2));
1251 /* 6, 7 */
1252 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
1253 /* long long sub/cmp, then BE (unsigned less than or equal)
1254 --> test dst <=u src */
1255 return unop(Iop_1Uto64,
1256 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1258 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNBE)) {
1259 /* long long sub/cmp, then NBE (unsigned greater than)
1260 --> test !(dst <=u src) */
1261 return binop(Iop_Xor64,
1262 unop(Iop_1Uto64,
1263 binop(Iop_CmpLE64U, cc_dep1, cc_dep2)),
1264 mkU64(1));
1267 /* 8, 9 */
1268 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondS)) {
1269 /* long long sub/cmp, then S (negative)
1270 --> (dst-src)[63]
1271 --> (dst-src) >>u 63 */
1272 return binop(Iop_Shr64,
1273 binop(Iop_Sub64, cc_dep1, cc_dep2),
1274 mkU8(63));
1276 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNS)) {
1277 /* long long sub/cmp, then NS (not negative)
1278 --> (dst-src)[63] ^ 1
1279 --> ((dst-src) >>u 63) ^ 1 */
1280 return binop(Iop_Xor64,
1281 binop(Iop_Shr64,
1282 binop(Iop_Sub64, cc_dep1, cc_dep2),
1283 mkU8(63)),
1284 mkU64(1));
1287 /* 12, 13 */
1288 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
1289 /* long long sub/cmp, then L (signed less than)
1290 --> test dst <s src */
1291 return unop(Iop_1Uto64,
1292 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1294 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNL)) {
1295 /* long long sub/cmp, then NL (signed greater than or equal)
1296 --> test dst >=s src
1297 --> test src <=s dst */
1298 return unop(Iop_1Uto64,
1299 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1302 /* 14, 15 */
1303 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondLE)) {
1304 /* long long sub/cmp, then LE (signed less than or equal)
1305 --> test dst <=s src */
1306 return unop(Iop_1Uto64,
1307 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1309 if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNLE)) {
1310 /* long long sub/cmp, then NLE (signed greater than)
1311 --> test !(dst <=s src)
1312 --> test (dst >s src)
1313 --> test (src <s dst) */
1314 return unop(Iop_1Uto64,
1315 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1319 /*---------------- SUBL ----------------*/
1321 /* 0, */
1322 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondO)) {
1323 /* This is very commonly generated by Javascript JITs, for
1324 the idiom "do a 32-bit subtract and jump to out-of-line
1325 code if an overflow occurs". */
1326 /* long sub/cmp, then O (overflow)
1327 --> ((dep1 ^ dep2) & (dep1 ^ (dep1 - dep2)))[31]
1328 --> (((dep1 ^ dep2) & (dep1 ^ (dep1 -64 dep2))) >>u 31) & 1
1330 vassert(isIRAtom(cc_dep1));
1331 vassert(isIRAtom(cc_dep2));
1332 return
1333 binop(Iop_And64,
1334 binop(Iop_Shr64,
1335 binop(Iop_And64,
1336 binop(Iop_Xor64, cc_dep1, cc_dep2),
1337 binop(Iop_Xor64,
1338 cc_dep1,
1339 binop(Iop_Sub64, cc_dep1, cc_dep2))),
1340 mkU8(31)),
1341 mkU64(1));
1344 /* 1, */
1345 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNO)) {
1346 /* No action. Never yet found a test case. */
1349 /* 2, 3 */
1351 /* It appears that LLVM 5.0 and later have a new way to find out
1352 whether the top N bits of a word W are all zero, by computing
1354 W <u 0---(N-1)---0 1 0---0 or
1355 W <=u 0---(N-1)---0 0 1---1
1357 In particular, the result will be defined if the top N bits of W
1358 are defined, even if the trailing bits -- those corresponding to
1359 the rightmost 0---0 / 1---1 section -- are undefined. Rather than
1360 make Memcheck more complex, we detect this case where we can and
1361 shift out the irrelevant and potentially undefined bits. */
1362 Int n = 0;
1363 Bool is_NB_or_NBE = False;
1364 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
1365 if (isU64(cond, AMD64CondB) || isU64(cond, AMD64CondNB)) {
1366 /* long sub/cmp, then B (unsigned less than),
1367 where dep2 is a power of 2:
1368 -> CmpLT32U(dep1, 1 << N)
1369 -> CmpEQ32(dep1 >>u N, 0)
1371 long sub/cmp, then NB (unsigned greater than or equal),
1372 where dep2 is a power of 2:
1373 -> CmpGE32U(dep1, 1 << N)
1374 -> CmpNE32(dep1 >>u N, 0)
1375 This avoids CmpLT32U/CmpGE32U being applied to potentially
1376 uninitialised bits in the area being shifted out. */
1377 n = isU64_1_shl_N(cc_dep2);
1378 is_NB_or_NBE = isU64(cond, AMD64CondNB);
1379 } else if (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE)) {
1380 /* long sub/cmp, then BE (unsigned less than or equal),
1381 where dep2 is a power of 2 minus 1:
1382 -> CmpLE32U(dep1, (1 << N) - 1)
1383 -> CmpEQ32(dep1 >>u N, 0)
1385 long sub/cmp, then NBE (unsigned greater than),
1386 where dep2 is a power of 2 minus 1:
1387 -> CmpGT32U(dep1, (1 << N) - 1)
1388 -> CmpNE32(dep1 >>u N, 0)
1389 This avoids CmpLE32U/CmpGT32U being applied to potentially
1390 uninitialised bits in the area being shifted out. */
1391 n = isU64_1_shl_N_minus_1(cc_dep2);
1392 is_NB_or_NBE = isU64(cond, AMD64CondNBE);
1395 if (n > 0) {
1396 vassert(n >= 1 && n <= 31);
1397 return unop(Iop_1Uto64,
1398 binop(is_NB_or_NBE ? Iop_CmpNE32 : Iop_CmpEQ32,
1399 binop(Iop_Shr32, unop(Iop_64to32, cc_dep1),
1400 mkU8(n)),
1401 mkU32(0)));
1404 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
1405 /* long sub/cmp, then B (unsigned less than)
1406 --> test dst <u src */
1407 return unop(Iop_1Uto64,
1408 binop(Iop_CmpLT32U,
1409 unop(Iop_64to32, cc_dep1),
1410 unop(Iop_64to32, cc_dep2)));
1412 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNB)) {
1413 /* long sub/cmp, then NB (unsigned greater than or equal)
1414 --> test src <=u dst */
1415 /* Note, args are opposite way round from the usual */
1416 return unop(Iop_1Uto64,
1417 binop(Iop_CmpLE32U,
1418 unop(Iop_64to32, cc_dep2),
1419 unop(Iop_64to32, cc_dep1)));
1422 /* 4, 5 */
1423 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
1424 /* long sub/cmp, then Z --> test dst==src */
1425 return unop(Iop_1Uto64,
1426 binop(Iop_CmpEQ32,
1427 unop(Iop_64to32, cc_dep1),
1428 unop(Iop_64to32, cc_dep2)));
1430 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
1431 /* long sub/cmp, then NZ --> test dst!=src */
1432 return unop(Iop_1Uto64,
1433 binop(Iop_CmpNE32,
1434 unop(Iop_64to32, cc_dep1),
1435 unop(Iop_64to32, cc_dep2)));
1438 /* 6, 7 */
1439 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
1440 /* long sub/cmp, then BE (unsigned less than or equal)
1441 --> test dst <=u src */
1442 return unop(Iop_1Uto64,
1443 binop(Iop_CmpLE32U,
1444 unop(Iop_64to32, cc_dep1),
1445 unop(Iop_64to32, cc_dep2)));
1447 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
1448 /* long sub/cmp, then NBE (unsigned greater than)
1449 --> test src <u dst */
1450 /* Note, args are opposite way round from the usual */
1451 return unop(Iop_1Uto64,
1452 binop(Iop_CmpLT32U,
1453 unop(Iop_64to32, cc_dep2),
1454 unop(Iop_64to32, cc_dep1)));
1457 /* 8, 9 */
1458 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
1459 /* long sub/cmp, then S (negative)
1460 --> (dst-src)[31]
1461 --> ((dst -64 src) >>u 31) & 1
1462 Pointless to narrow the args to 32 bit before the subtract. */
1463 return binop(Iop_And64,
1464 binop(Iop_Shr64,
1465 binop(Iop_Sub64, cc_dep1, cc_dep2),
1466 mkU8(31)),
1467 mkU64(1));
1469 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNS)) {
1470 /* long sub/cmp, then NS (not negative)
1471 --> (dst-src)[31] ^ 1
1472 --> (((dst -64 src) >>u 31) & 1) ^ 1
1473 Pointless to narrow the args to 32 bit before the subtract. */
1474 return binop(Iop_Xor64,
1475 binop(Iop_And64,
1476 binop(Iop_Shr64,
1477 binop(Iop_Sub64, cc_dep1, cc_dep2),
1478 mkU8(31)),
1479 mkU64(1)),
1480 mkU64(1));
1483 /* 12, 13 */
1484 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
1485 /* long sub/cmp, then L (signed less than)
1486 --> test dst <s src */
1487 return unop(Iop_1Uto64,
1488 binop(Iop_CmpLT32S,
1489 unop(Iop_64to32, cc_dep1),
1490 unop(Iop_64to32, cc_dep2)));
1492 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNL)) {
1493 /* long sub/cmp, then NL (signed greater than or equal)
1494 --> test dst >=s src
1495 --> test src <=s dst */
1496 return unop(Iop_1Uto64,
1497 binop(Iop_CmpLE32S,
1498 unop(Iop_64to32, cc_dep2),
1499 unop(Iop_64to32, cc_dep1)));
1502 /* 14, 15 */
1503 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
1504 /* long sub/cmp, then LE (signed less than or equal)
1505 --> test dst <=s src */
1506 return unop(Iop_1Uto64,
1507 binop(Iop_CmpLE32S,
1508 unop(Iop_64to32, cc_dep1),
1509 unop(Iop_64to32, cc_dep2)));
1512 if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
1513 /* long sub/cmp, then NLE (signed greater than)
1514 --> test !(dst <=s src)
1515 --> test (dst >s src)
1516 --> test (src <s dst) */
1517 return unop(Iop_1Uto64,
1518 binop(Iop_CmpLT32S,
1519 unop(Iop_64to32, cc_dep2),
1520 unop(Iop_64to32, cc_dep1)));
1524 /*---------------- SUBW ----------------*/
1526 /* 4, 5 */
1527 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
1528 /* word sub/cmp, then Z --> test dst==src */
1529 return unop(Iop_1Uto64,
1530 binop(Iop_CmpEQ16,
1531 unop(Iop_64to16,cc_dep1),
1532 unop(Iop_64to16,cc_dep2)));
1534 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
1535 /* word sub/cmp, then NZ --> test dst!=src */
1536 return unop(Iop_1Uto64,
1537 binop(Iop_CmpNE16,
1538 unop(Iop_64to16,cc_dep1),
1539 unop(Iop_64to16,cc_dep2)));
1542 /* 6, */
1543 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondBE)) {
1544 /* word sub/cmp, then BE (unsigned less than or equal)
1545 --> test dst <=u src */
1546 return unop(Iop_1Uto64,
1547 binop(Iop_CmpLE64U,
1548 binop(Iop_Shl64, cc_dep1, mkU8(48)),
1549 binop(Iop_Shl64, cc_dep2, mkU8(48))));
1552 /* 8, 9 */
1553 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondS)
1554 && isU64(cc_dep2, 0)) {
1555 /* word sub/cmp of zero, then S --> test (dst-0 <s 0)
1556 --> test dst <s 0
1557 --> (ULong)dst[15]
1558 This is yet another scheme by which clang figures out if the
1559 top bit of a word is 1 or 0. See also LOGICB/CondS below. */
1560 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1561 for an 16-bit comparison, since the args to the helper
1562 function are always U64s. */
1563 return binop(Iop_And64,
1564 binop(Iop_Shr64,cc_dep1,mkU8(15)),
1565 mkU64(1));
1567 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNS)
1568 && isU64(cc_dep2, 0)) {
1569 /* word sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1570 --> test !(dst <s 0)
1571 --> (ULong) !dst[15]
1573 return binop(Iop_Xor64,
1574 binop(Iop_And64,
1575 binop(Iop_Shr64,cc_dep1,mkU8(15)),
1576 mkU64(1)),
1577 mkU64(1));
1580 /* 14, */
1581 if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
1582 /* word sub/cmp, then LE (signed less than or equal)
1583 --> test dst <=s src */
1584 return unop(Iop_1Uto64,
1585 binop(Iop_CmpLE64S,
1586 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1587 binop(Iop_Shl64,cc_dep2,mkU8(48))));
1591 /*---------------- SUBB ----------------*/
1593 /* 2, 3 */
1594 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondB)) {
1595 /* byte sub/cmp, then B (unsigned less than)
1596 --> test dst <u src */
1597 return unop(Iop_1Uto64,
1598 binop(Iop_CmpLT64U,
1599 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1600 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1602 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNB)) {
1603 /* byte sub/cmp, then NB (unsigned greater than or equal)
1604 --> test src <=u dst */
1605 /* Note, args are opposite way round from the usual */
1606 return unop(Iop_1Uto64,
1607 binop(Iop_CmpLE64U,
1608 binop(Iop_And64, cc_dep2, mkU64(0xFF)),
1609 binop(Iop_And64, cc_dep1, mkU64(0xFF))));
1612 /* 4, 5 */
1613 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
1614 /* byte sub/cmp, then Z --> test dst==src */
1615 return unop(Iop_1Uto64,
1616 binop(Iop_CmpEQ8,
1617 unop(Iop_64to8,cc_dep1),
1618 unop(Iop_64to8,cc_dep2)));
1620 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
1621 /* byte sub/cmp, then NZ --> test dst!=src */
1622 return unop(Iop_1Uto64,
1623 binop(Iop_CmpNE8,
1624 unop(Iop_64to8,cc_dep1),
1625 unop(Iop_64to8,cc_dep2)));
1628 /* 6, */
1629 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
1630 /* byte sub/cmp, then BE (unsigned less than or equal)
1631 --> test dst <=u src */
1632 return unop(Iop_1Uto64,
1633 binop(Iop_CmpLE64U,
1634 binop(Iop_And64, cc_dep1, mkU64(0xFF)),
1635 binop(Iop_And64, cc_dep2, mkU64(0xFF))));
1638 /* 8, 9 */
1639 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
1640 && isU64(cc_dep2, 0)) {
1641 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1642 --> test dst <s 0
1643 --> (ULong)dst[7]
1644 This is yet another scheme by which gcc figures out if the
1645 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1646 /* Note: isU64(cc_dep2, 0) is correct, even though this is
1647 for an 8-bit comparison, since the args to the helper
1648 function are always U64s. */
1649 return binop(Iop_And64,
1650 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1651 mkU64(1));
1653 if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
1654 && isU64(cc_dep2, 0)) {
1655 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1656 --> test !(dst <s 0)
1657 --> (ULong) !dst[7]
1659 return binop(Iop_Xor64,
1660 binop(Iop_And64,
1661 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1662 mkU64(1)),
1663 mkU64(1));
1666 /*---------------- LOGICQ ----------------*/
1668 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
1669 /* long long and/or/xor, then Z --> test dst==0 */
1670 return unop(Iop_1Uto64,
1671 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1673 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
1674 /* long long and/or/xor, then NZ --> test dst!=0 */
1675 return unop(Iop_1Uto64,
1676 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1679 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
1680 /* long long and/or/xor, then L
1681 LOGIC sets SF and ZF according to the
1682 result and makes OF be zero. L computes SF ^ OF, but
1683 OF is zero, so this reduces to SF -- which will be 1 iff
1684 the result is < signed 0. Hence ...
1686 return unop(Iop_1Uto64,
1687 binop(Iop_CmpLT64S,
1688 cc_dep1,
1689 mkU64(0)));
1692 // Verified
1693 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondS)) {
1694 /* long long and/or/xor, then S --> (ULong)result[63] */
1695 return binop(Iop_Shr64, cc_dep1, mkU8(63));
1697 // Verified
1698 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNS)) {
1699 /* long long and/or/xor, then S --> (ULong) ~ result[63] */
1700 return binop(Iop_Xor64,
1701 binop(Iop_Shr64, cc_dep1, mkU8(63)),
1702 mkU64(1));
1705 /*---------------- LOGICL ----------------*/
1707 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
1708 /* long and/or/xor, then Z --> test dst==0 */
1709 return unop(Iop_1Uto64,
1710 binop(Iop_CmpEQ32,
1711 unop(Iop_64to32, cc_dep1),
1712 mkU32(0)));
1714 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
1715 /* long and/or/xor, then NZ --> test dst!=0 */
1716 return unop(Iop_1Uto64,
1717 binop(Iop_CmpNE32,
1718 unop(Iop_64to32, cc_dep1),
1719 mkU32(0)));
1722 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
1723 /* long and/or/xor, then LE
1724 This is pretty subtle. LOGIC sets SF and ZF according to the
1725 result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
1726 OF is zero, so this reduces to SF | ZF -- which will be 1 iff
1727 the result is <=signed 0. Hence ...
1729 return unop(Iop_1Uto64,
1730 binop(Iop_CmpLE32S,
1731 unop(Iop_64to32, cc_dep1),
1732 mkU32(0)));
1735 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
1736 /* long and/or/xor, then S --> (ULong)result[31] */
1737 return binop(Iop_And64,
1738 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1739 mkU64(1));
1741 if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
1742 /* long and/or/xor, then S --> (ULong) ~ result[31] */
1743 return binop(Iop_Xor64,
1744 binop(Iop_And64,
1745 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1746 mkU64(1)),
1747 mkU64(1));
1750 /*---------------- LOGICW ----------------*/
1752 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondZ)) {
1753 /* word and/or/xor, then Z --> test dst==0 */
1754 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1755 // it exactly at EdcAUTO.
1756 return unop(Iop_1Uto64,
1757 binop(Iop_CmpEQ32,
1758 unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1759 mkU32(0)));
1761 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNZ)) {
1762 /* word and/or/xor, then NZ --> test dst!=0 */
1763 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1764 // it exactly at EdcAUTO.
1765 return unop(Iop_1Uto64,
1766 binop(Iop_CmpNE32,
1767 unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1768 mkU32(0)));
1771 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondS)) {
1772 /* word and/or/xor, then S --> (ULong)result[15] */
1773 return binop(Iop_And64,
1774 binop(Iop_Shr64, cc_dep1, mkU8(15)),
1775 mkU64(1));
1777 if (isU64(cc_op, AMD64G_CC_OP_LOGICW) && isU64(cond, AMD64CondNS)) {
1778 /* word and/or/xor, then S --> (ULong) ~ result[15] */
1779 return binop(Iop_Xor64,
1780 binop(Iop_And64,
1781 binop(Iop_Shr64, cc_dep1, mkU8(15)),
1782 mkU64(1)),
1783 mkU64(1));
1786 /*---------------- LOGICB ----------------*/
1788 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
1789 /* byte and/or/xor, then Z --> test dst==0 */
1790 // Use CmpEQ32 rather than CmpEQ64 here, so that Memcheck instruments
1791 // it exactly at EdcAUTO.
1792 return unop(Iop_1Uto64,
1793 binop(Iop_CmpEQ32,
1794 unop(Iop_8Uto32, unop(Iop_64to8, cc_dep1)),
1795 mkU32(0)));
1797 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
1798 /* byte and/or/xor, then NZ --> test dst!=0 */
1799 // Use CmpNE32 rather than CmpNE64 here, so that Memcheck instruments
1800 // it exactly at EdcAUTO.
1801 return unop(Iop_1Uto64,
1802 binop(Iop_CmpNE32,
1803 unop(Iop_8Uto32, unop(Iop_64to8, cc_dep1)),
1804 mkU32(0)));
1807 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
1808 /* this is an idiom gcc sometimes uses to find out if the top
1809 bit of a byte register is set: eg testb %al,%al; js ..
1810 Since it just depends on the top bit of the byte, extract
1811 that bit and explicitly get rid of all the rest. This
1812 helps memcheck avoid false positives in the case where any
1813 of the other bits in the byte are undefined. */
1814 /* byte and/or/xor, then S --> (UInt)result[7] */
1815 return binop(Iop_And64,
1816 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1817 mkU64(1));
1819 if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
1820 /* byte and/or/xor, then NS --> (UInt)!result[7] */
1821 return binop(Iop_Xor64,
1822 binop(Iop_And64,
1823 binop(Iop_Shr64,cc_dep1,mkU8(7)),
1824 mkU64(1)),
1825 mkU64(1));
1828 /*---------------- INCB ----------------*/
1830 if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
1831 /* 8-bit inc, then LE --> sign bit of the arg */
1832 return binop(Iop_And64,
1833 binop(Iop_Shr64,
1834 binop(Iop_Sub64, cc_dep1, mkU64(1)),
1835 mkU8(7)),
1836 mkU64(1));
1839 /*---------------- INCW ----------------*/
1841 if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
1842 /* 16-bit inc, then Z --> test dst == 0 */
1843 return unop(Iop_1Uto64,
1844 binop(Iop_CmpEQ64,
1845 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1846 mkU64(0)));
1849 /*---------------- DECL ----------------*/
1851 if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
1852 /* dec L, then Z --> test dst == 0 */
1853 return unop(Iop_1Uto64,
1854 binop(Iop_CmpEQ32,
1855 unop(Iop_64to32, cc_dep1),
1856 mkU32(0)));
1859 /*---------------- DECW ----------------*/
1861 if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
1862 /* 16-bit dec, then NZ --> test dst != 0 */
1863 return unop(Iop_1Uto64,
1864 binop(Iop_CmpNE64,
1865 binop(Iop_Shl64,cc_dep1,mkU8(48)),
1866 mkU64(0)));
1869 /*---------------- SHRQ ----------------*/
1871 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) {
1872 /* SHRQ, then Z --> test result[63:0] == 0 */
1873 return unop(Iop_1Uto64,
1874 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1876 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) {
1877 /* SHRQ, then NZ --> test result[63:0] != 0 */
1878 return unop(Iop_1Uto64,
1879 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1882 if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondS)) {
1883 /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */
1884 return binop(Iop_Shr64, cc_dep1, mkU8(63));
1886 // No known test case for this, hence disabled:
1887 //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) {
1888 // /* SHRQ, then NS --> (ULong) ~ result[63] */
1889 // vassert(0);
1892 /*---------------- SHRL ----------------*/
1894 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) {
1895 /* SHRL, then Z --> test dep1 == 0 */
1896 return unop(Iop_1Uto64,
1897 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1898 mkU32(0)));
1900 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondNZ)) {
1901 /* SHRL, then NZ --> test dep1 != 0 */
1902 return unop(Iop_1Uto64,
1903 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1904 mkU32(0)));
1907 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondS)) {
1908 /* SHRL/SARL, then S --> (ULong)result[31] */
1909 return binop(Iop_And64,
1910 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1911 mkU64(1));
1913 if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondNS)) {
1914 /* SHRL/SARL, then NS --> (ULong) ~ result[31] */
1915 return binop(Iop_Xor64,
1916 binop(Iop_And64,
1917 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1918 mkU64(1)),
1919 mkU64(1));
1922 /*---------------- SHRW ----------------*/
1924 if (isU64(cc_op, AMD64G_CC_OP_SHRW) && isU64(cond, AMD64CondZ)) {
1925 /* SHRW, then Z --> test dep1 == 0 */
1926 return unop(Iop_1Uto64,
1927 binop(Iop_CmpEQ32,
1928 unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1929 mkU32(0)));
1931 // No known test case for this, hence disabled:
1932 //if (isU64(cc_op, AMD64G_CC_OP_SHRW) && isU64(cond, AMD64CondNZ)) {
1933 // /* SHRW, then NZ --> test dep1 == 0 */
1934 // return unop(Iop_1Uto64,
1935 // binop(Iop_CmpNE32,
1936 // unop(Iop_16Uto32, unop(Iop_64to16, cc_dep1)),
1937 // mkU32(0)));
1940 /*---------------- SHLQ ----------------*/
1942 if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondZ)) {
1943 /* SHLQ, then Z --> test dep1 == 0 */
1944 return unop(Iop_1Uto64,
1945 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1947 if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNZ)) {
1948 /* SHLQ, then NZ --> test dep1 != 0 */
1949 return unop(Iop_1Uto64,
1950 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1953 // Verified
1954 if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondS)) {
1955 /* SHLQ, then S --> (ULong)result[63] */
1956 return binop(Iop_Shr64, cc_dep1, mkU8(63));
1958 // No known test case
1959 //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) {
1960 // /* SHLQ, then NS --> (ULong) ~ result[63] */
1961 // vassert(0);
1964 /*---------------- SHLL ----------------*/
1966 if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondZ)) {
1967 /* SHLL, then Z --> test result[31:0] == 0 */
1968 return unop(Iop_1Uto64,
1969 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1970 mkU32(0)));
1972 // Verified
1973 if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNZ)) {
1974 /* SHLL, then NZ --> test dep1 != 0 */
1975 return unop(Iop_1Uto64,
1976 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1977 mkU32(0)));
1980 if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondS)) {
1981 /* SHLL, then S --> (ULong)result[31] */
1982 return binop(Iop_And64,
1983 binop(Iop_Shr64, cc_dep1, mkU8(31)),
1984 mkU64(1));
1986 // No known test case
1987 //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) {
1988 // /* SHLL, then NS --> (ULong) ~ result[31] */
1989 // vassert(0);
1992 /*---------------- COPY ----------------*/
1993 /* This can happen, as a result of amd64 FP compares: "comisd ... ;
1994 jbe" for example. */
1996 if (isU64(cc_op, AMD64G_CC_OP_COPY)
1997 && (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
1998 /* COPY, then BE --> extract C and Z from dep1, and test (C
1999 or Z == 1). */
2000 /* COPY, then NBE --> extract C and Z from dep1, and test (C
2001 or Z == 0). */
2002 ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
2003 return
2004 unop(
2005 Iop_1Uto64,
2006 binop(
2007 Iop_CmpEQ64,
2008 binop(
2009 Iop_And64,
2010 binop(
2011 Iop_Or64,
2012 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
2013 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
2015 mkU64(1)
2017 mkU64(nnn)
2022 if (isU64(cc_op, AMD64G_CC_OP_COPY)
2023 && (isU64(cond, AMD64CondB) || isU64(cond, AMD64CondNB))) {
2024 /* COPY, then B --> extract C from dep1, and test (C == 1). */
2025 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
2026 ULong nnn = isU64(cond, AMD64CondB) ? 1 : 0;
2027 return
2028 unop(
2029 Iop_1Uto64,
2030 binop(
2031 Iop_CmpEQ64,
2032 binop(
2033 Iop_And64,
2034 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
2035 mkU64(1)
2037 mkU64(nnn)
2042 if (isU64(cc_op, AMD64G_CC_OP_COPY)
2043 && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
2044 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
2045 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
2046 ULong nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
2047 return
2048 unop(
2049 Iop_1Uto64,
2050 binop(
2051 Iop_CmpEQ64,
2052 binop(
2053 Iop_And64,
2054 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
2055 mkU64(1)
2057 mkU64(nnn)
2062 if (isU64(cc_op, AMD64G_CC_OP_COPY)
2063 && (isU64(cond, AMD64CondP) || isU64(cond, AMD64CondNP))) {
2064 /* COPY, then P --> extract P from dep1, and test (P == 1). */
2065 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
2066 ULong nnn = isU64(cond, AMD64CondP) ? 1 : 0;
2067 return
2068 unop(
2069 Iop_1Uto64,
2070 binop(
2071 Iop_CmpEQ64,
2072 binop(
2073 Iop_And64,
2074 binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
2075 mkU64(1)
2077 mkU64(nnn)
2082 # if 0
2083 if (cond->tag == Iex_Const && cc_op->tag == Iex_Const) {
2084 vex_printf("spec request failed: ");
2085 vex_printf(" %s ", function_name);
2086 for (i = 0; i < 2/*arity*/; i++) {
2087 vex_printf(" ");
2088 ppIRExpr(args[i]);
2090 vex_printf("\n");
2092 # endif
2094 return NULL;
2097 /* --------- specialising "amd64g_calculate_rflags_c" --------- */
2099 if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
2100 /* specialise calls to above "calculate_rflags_c" function */
2101 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
2102 vassert(arity == 4);
2103 cc_op = args[0];
2104 cc_dep1 = args[1];
2105 cc_dep2 = args[2];
2106 cc_ndep = args[3];
2108 if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
2109 /* C after sub denotes unsigned less than */
2110 return unop(Iop_1Uto64,
2111 binop(Iop_CmpLT64U,
2112 cc_dep1,
2113 cc_dep2));
2115 if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
2116 /* C after sub denotes unsigned less than */
2117 return unop(Iop_1Uto64,
2118 binop(Iop_CmpLT32U,
2119 unop(Iop_64to32, cc_dep1),
2120 unop(Iop_64to32, cc_dep2)));
2122 if (isU64(cc_op, AMD64G_CC_OP_SUBW)) {
2123 /* C after sub denotes unsigned less than */
2124 return unop(Iop_1Uto64,
2125 binop(Iop_CmpLT64U,
2126 binop(Iop_And64,cc_dep1,mkU64(0xFFFF)),
2127 binop(Iop_And64,cc_dep2,mkU64(0xFFFF))));
2129 if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
2130 /* C after sub denotes unsigned less than */
2131 return unop(Iop_1Uto64,
2132 binop(Iop_CmpLT64U,
2133 binop(Iop_And64,cc_dep1,mkU64(0xFF)),
2134 binop(Iop_And64,cc_dep2,mkU64(0xFF))));
2136 if (isU64(cc_op, AMD64G_CC_OP_ADDQ)) {
2137 /* C after add denotes sum <u either arg */
2138 return unop(Iop_1Uto64,
2139 binop(Iop_CmpLT64U,
2140 binop(Iop_Add64, cc_dep1, cc_dep2),
2141 cc_dep1));
2143 if (isU64(cc_op, AMD64G_CC_OP_ADDL)) {
2144 /* C after add denotes sum <u either arg */
2145 return unop(Iop_1Uto64,
2146 binop(Iop_CmpLT32U,
2147 unop(Iop_64to32, binop(Iop_Add64, cc_dep1, cc_dep2)),
2148 unop(Iop_64to32, cc_dep1)));
2150 if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
2151 || isU64(cc_op, AMD64G_CC_OP_LOGICL)
2152 || isU64(cc_op, AMD64G_CC_OP_LOGICW)
2153 || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
2154 /* cflag after logic is zero */
2155 return mkU64(0);
2157 if (isU64(cc_op, AMD64G_CC_OP_DECL)
2158 || isU64(cc_op, AMD64G_CC_OP_INCL)
2159 || isU64(cc_op, AMD64G_CC_OP_DECQ)
2160 || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
2161 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
2162 return cc_ndep;
2165 # if 0
2166 if (cc_op->tag == Iex_Const) {
2167 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
2169 # endif
2171 # if 0
2172 if (cc_op->tag == Iex_Const) {
2173 vex_printf("spec request failed: ");
2174 vex_printf(" %s ", function_name);
2175 for (i = 0; i < 2/*arity*/; i++) {
2176 vex_printf(" ");
2177 ppIRExpr(args[i]);
2179 vex_printf("\n");
2181 # endif
2183 return NULL;
2186 # undef unop
2187 # undef binop
2188 # undef mkU64
2189 # undef mkU32
2190 # undef mkU8
2192 return NULL;
2196 /*---------------------------------------------------------------*/
2197 /*--- Supporting functions for x87 FPU activities. ---*/
2198 /*---------------------------------------------------------------*/
2200 static inline Bool host_is_little_endian ( void )
2202 UInt x = 0x76543210;
2203 UChar* p = (UChar*)(&x);
2204 return toBool(*p == 0x10);
2207 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
2208 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2209 ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
2211 Bool mantissaIsZero;
2212 Int bexp;
2213 UChar sign;
2214 UChar* f64;
2216 vassert(host_is_little_endian());
2218 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
2220 f64 = (UChar*)(&dbl);
2221 sign = toUChar( (f64[7] >> 7) & 1 );
2223 /* First off, if the tag indicates the register was empty,
2224 return 1,0,sign,1 */
2225 if (tag == 0) {
2226 /* vex_printf("Empty\n"); */
2227 return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
2228 | AMD64G_FC_MASK_C0;
2231 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
2232 bexp &= 0x7FF;
2234 mantissaIsZero
2235 = toBool(
2236 (f64[6] & 0x0F) == 0
2237 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
2240 /* If both exponent and mantissa are zero, the value is zero.
2241 Return 1,0,sign,0. */
2242 if (bexp == 0 && mantissaIsZero) {
2243 /* vex_printf("Zero\n"); */
2244 return AMD64G_FC_MASK_C3 | 0
2245 | (sign << AMD64G_FC_SHIFT_C1) | 0;
2248 /* If exponent is zero but mantissa isn't, it's a denormal.
2249 Return 1,1,sign,0. */
2250 if (bexp == 0 && !mantissaIsZero) {
2251 /* vex_printf("Denormal\n"); */
2252 return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
2253 | (sign << AMD64G_FC_SHIFT_C1) | 0;
2256 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
2257 Return 0,1,sign,1. */
2258 if (bexp == 0x7FF && mantissaIsZero) {
2259 /* vex_printf("Inf\n"); */
2260 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
2261 | AMD64G_FC_MASK_C0;
2264 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
2265 Return 0,0,sign,1. */
2266 if (bexp == 0x7FF && !mantissaIsZero) {
2267 /* vex_printf("NaN\n"); */
2268 return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
2271 /* Uh, ok, we give up. It must be a normal finite number.
2272 Return 0,1,sign,0.
2274 /* vex_printf("normal\n"); */
2275 return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
2279 /* This is used to implement both 'frstor' and 'fldenv'. The latter
2280 appears to differ from the former only in that the 8 FP registers
2281 themselves are not transferred into the guest state. */
2282 static
2283 VexEmNote do_put_x87 ( Bool moveRegs,
2284 /*IN*/Fpu_State* x87_state,
2285 /*OUT*/VexGuestAMD64State* vex_state )
2287 Int stno, preg;
2288 UInt tag;
2289 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2290 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2291 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
2292 UInt tagw = x87_state->env[FP_ENV_TAG];
2293 UInt fpucw = x87_state->env[FP_ENV_CTRL];
2294 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700;
2295 VexEmNote ew;
2296 UInt fpround;
2297 ULong pair;
2299 /* Copy registers and tags */
2300 for (stno = 0; stno < 8; stno++) {
2301 preg = (stno + ftop) & 7;
2302 tag = (tagw >> (2*preg)) & 3;
2303 if (tag == 3) {
2304 /* register is empty */
2305 /* hmm, if it's empty, does it still get written? Probably
2306 safer to say it does. If we don't, memcheck could get out
2307 of sync, in that it thinks all FP registers are defined by
2308 this helper, but in reality some have not been updated. */
2309 if (moveRegs)
2310 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2311 vexTags[preg] = 0;
2312 } else {
2313 /* register is non-empty */
2314 if (moveRegs)
2315 convert_f80le_to_f64le( &x87_state->reg[10*stno],
2316 (UChar*)&vexRegs[preg] );
2317 vexTags[preg] = 1;
2321 /* stack pointer */
2322 vex_state->guest_FTOP = ftop;
2324 /* status word */
2325 vex_state->guest_FC3210 = c3210;
2327 /* handle the control word, setting FPROUND and detecting any
2328 emulation warnings. */
2329 pair = amd64g_check_fldcw ( (ULong)fpucw );
2330 fpround = (UInt)pair & 0xFFFFFFFFULL;
2331 ew = (VexEmNote)(pair >> 32);
2333 vex_state->guest_FPROUND = fpround & 3;
2335 /* emulation warnings --> caller */
2336 return ew;
2340 /* Create an x87 FPU state from the guest state, as close as
2341 we can approximate it. */
2342 static
2343 void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
2344 /*OUT*/Fpu_State* x87_state )
2346 Int i, stno, preg;
2347 UInt tagw;
2348 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2349 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2350 UInt ftop = vex_state->guest_FTOP;
2351 UInt c3210 = vex_state->guest_FC3210;
2353 for (i = 0; i < 14; i++)
2354 x87_state->env[i] = 0;
2356 x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
2357 = x87_state->env[13] = 0xFFFF;
2358 x87_state->env[FP_ENV_STAT]
2359 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2360 x87_state->env[FP_ENV_CTRL]
2361 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2363 /* Dump the register stack in ST order. */
2364 tagw = 0;
2365 for (stno = 0; stno < 8; stno++) {
2366 preg = (stno + ftop) & 7;
2367 if (vexTags[preg] == 0) {
2368 /* register is empty */
2369 tagw |= (3 << (2*preg));
2370 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2371 &x87_state->reg[10*stno] );
2372 } else {
2373 /* register is full. */
2374 tagw |= (0 << (2*preg));
2375 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2376 &x87_state->reg[10*stno] );
2379 x87_state->env[FP_ENV_TAG] = toUShort(tagw);
2383 /*---------------------------------------------------------------*/
2384 /*--- Supporting functions for XSAVE/FXSAVE. ---*/
2385 /*---------------------------------------------------------------*/
2387 /* CALLED FROM GENERATED CODE */
2388 /* DIRTY HELPER (reads guest state, writes guest mem) */
2389 /* XSAVE component 0 is the x87 FPU state. */
2390 void amd64g_dirtyhelper_XSAVE_COMPONENT_0
2391 ( VexGuestAMD64State* gst, HWord addr )
2393 /* Derived from values obtained from
2394 vendor_id : AuthenticAMD
2395 cpu family : 15
2396 model : 12
2397 model name : AMD Athlon(tm) 64 Processor 3200+
2398 stepping : 0
2399 cpu MHz : 2200.000
2400 cache size : 512 KB
2402 /* Somewhat roundabout, but at least it's simple. */
2403 Fpu_State tmp;
2404 UShort* addrS = (UShort*)addr;
2405 UChar* addrC = (UChar*)addr;
2406 UShort fp_tags;
2407 UInt summary_tags;
2408 Int r, stno;
2409 UShort *srcS, *dstS;
2411 do_get_x87( gst, &tmp );
2413 /* Now build the proper fxsave x87 image from the fsave x87 image
2414 we just made. */
2416 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
2417 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
2419 /* set addrS[2] in an endian-independent way */
2420 summary_tags = 0;
2421 fp_tags = tmp.env[FP_ENV_TAG];
2422 for (r = 0; r < 8; r++) {
2423 if ( ((fp_tags >> (2*r)) & 3) != 3 )
2424 summary_tags |= (1 << r);
2426 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
2427 addrC[5] = 0; /* pad */
2429 /* FOP: faulting fpu opcode. From experimentation, the real CPU
2430 does not write this field. (?!) */
2431 addrS[3] = 0; /* BOGUS */
2433 /* RIP (Last x87 instruction pointer). From experimentation, the
2434 real CPU does not write this field. (?!) */
2435 addrS[4] = 0; /* BOGUS */
2436 addrS[5] = 0; /* BOGUS */
2437 addrS[6] = 0; /* BOGUS */
2438 addrS[7] = 0; /* BOGUS */
2440 /* RDP (Last x87 data pointer). From experimentation, the real CPU
2441 does not write this field. (?!) */
2442 addrS[8] = 0; /* BOGUS */
2443 addrS[9] = 0; /* BOGUS */
2444 addrS[10] = 0; /* BOGUS */
2445 addrS[11] = 0; /* BOGUS */
2447 /* addrS[13,12] are MXCSR -- not written */
2448 /* addrS[15,14] are MXCSR_MASK -- not written */
2450 /* Copy in the FP registers, in ST order. */
2451 for (stno = 0; stno < 8; stno++) {
2452 srcS = (UShort*)(&tmp.reg[10*stno]);
2453 dstS = (UShort*)(&addrS[16 + 8*stno]);
2454 dstS[0] = srcS[0];
2455 dstS[1] = srcS[1];
2456 dstS[2] = srcS[2];
2457 dstS[3] = srcS[3];
2458 dstS[4] = srcS[4];
2459 dstS[5] = 0;
2460 dstS[6] = 0;
2461 dstS[7] = 0;
2466 /* CALLED FROM GENERATED CODE */
2467 /* DIRTY HELPER (reads guest state, writes guest mem) */
2468 /* XSAVE component 1 is the SSE state. */
2469 void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
2470 ( VexGuestAMD64State* gst, HWord addr )
2472 UShort* addrS = (UShort*)addr;
2473 UInt mxcsr;
2475 /* The only non-register parts of the SSE state are MXCSR and
2476 MXCSR_MASK. */
2477 mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
2479 addrS[12] = toUShort(mxcsr); /* MXCSR */
2480 addrS[13] = toUShort(mxcsr >> 16);
2482 addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
2483 addrS[15] = 0x0000; /* MXCSR mask (hi16) */
2487 /* VISIBLE TO LIBVEX CLIENT */
2488 /* Do FXSAVE from the supplied VexGuestAMD64State structure and store
2489 the result at the given address which represents a buffer of at
2490 least 416 bytes.
2492 This function is not called from generated code. FXSAVE is dealt
2493 with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
2494 functions above plus some in-line IR. This function is merely a
2495 convenience function for VEX's users.
2497 void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
2498 /*OUT*/HWord fp_state )
2500 /* Do the x87 part */
2501 amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state);
2503 /* And now the SSE part, except for the registers themselves. */
2504 amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2506 /* That's the first 160 bytes of the image done. */
2507 /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
2508 big-endian, these need to be byte-swapped. */
2509 U128 *xmm = (U128 *)(fp_state + 160);
2510 vassert(host_is_little_endian());
2512 # define COPY_U128(_dst,_src) \
2513 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2514 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2515 while (0)
2517 COPY_U128( xmm[0], gst->guest_YMM0 );
2518 COPY_U128( xmm[1], gst->guest_YMM1 );
2519 COPY_U128( xmm[2], gst->guest_YMM2 );
2520 COPY_U128( xmm[3], gst->guest_YMM3 );
2521 COPY_U128( xmm[4], gst->guest_YMM4 );
2522 COPY_U128( xmm[5], gst->guest_YMM5 );
2523 COPY_U128( xmm[6], gst->guest_YMM6 );
2524 COPY_U128( xmm[7], gst->guest_YMM7 );
2525 COPY_U128( xmm[8], gst->guest_YMM8 );
2526 COPY_U128( xmm[9], gst->guest_YMM9 );
2527 COPY_U128( xmm[10], gst->guest_YMM10 );
2528 COPY_U128( xmm[11], gst->guest_YMM11 );
2529 COPY_U128( xmm[12], gst->guest_YMM12 );
2530 COPY_U128( xmm[13], gst->guest_YMM13 );
2531 COPY_U128( xmm[14], gst->guest_YMM14 );
2532 COPY_U128( xmm[15], gst->guest_YMM15 );
2533 # undef COPY_U128
2537 /*---------------------------------------------------------------*/
2538 /*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
2539 /*---------------------------------------------------------------*/
2541 /* CALLED FROM GENERATED CODE */
2542 /* DIRTY HELPER (writes guest state, reads guest mem) */
2543 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
2544 ( VexGuestAMD64State* gst, HWord addr )
2546 Fpu_State tmp;
2547 UShort* addrS = (UShort*)addr;
2548 UChar* addrC = (UChar*)addr;
2549 UShort fp_tags;
2550 Int r, stno, i;
2552 /* Copy the x87 registers out of the image, into a temporary
2553 Fpu_State struct. */
2554 for (i = 0; i < 14; i++) tmp.env[i] = 0;
2555 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
2556 /* fill in tmp.reg[0..7] */
2557 for (stno = 0; stno < 8; stno++) {
2558 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
2559 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
2560 dstS[0] = srcS[0];
2561 dstS[1] = srcS[1];
2562 dstS[2] = srcS[2];
2563 dstS[3] = srcS[3];
2564 dstS[4] = srcS[4];
2566 /* fill in tmp.env[0..13] */
2567 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
2568 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
2570 fp_tags = 0;
2571 for (r = 0; r < 8; r++) {
2572 if (addrC[4] & (1<<r))
2573 fp_tags |= (0 << (2*r)); /* EMPTY */
2574 else
2575 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
2577 tmp.env[FP_ENV_TAG] = fp_tags;
2579 /* Now write 'tmp' into the guest state. */
2580 VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
2582 return warnX87;
2586 /* CALLED FROM GENERATED CODE */
2587 /* DIRTY HELPER (writes guest state, reads guest mem) */
2588 VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
2589 ( VexGuestAMD64State* gst, HWord addr )
2591 UShort* addrS = (UShort*)addr;
2592 UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
2593 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
2594 ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
2596 VexEmNote warnXMM = (VexEmNote)(w64 >> 32);
2598 gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
2599 return warnXMM;
2603 /* VISIBLE TO LIBVEX CLIENT */
2604 /* Do FXRSTOR from the supplied address and store read values to the given
2605 VexGuestAMD64State structure.
2607 This function is not called from generated code. FXRSTOR is dealt
2608 with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
2609 functions above plus some in-line IR. This function is merely a
2610 convenience function for VEX's users.
2612 VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
2613 /*MOD*/VexGuestAMD64State* gst )
2615 /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
2616 to be byte-swapped. */
2617 U128 *xmm = (U128 *)(fp_state + 160);
2619 vassert(host_is_little_endian());
2621 # define COPY_U128(_dst,_src) \
2622 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
2623 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
2624 while (0)
2626 COPY_U128( gst->guest_YMM0, xmm[0] );
2627 COPY_U128( gst->guest_YMM1, xmm[1] );
2628 COPY_U128( gst->guest_YMM2, xmm[2] );
2629 COPY_U128( gst->guest_YMM3, xmm[3] );
2630 COPY_U128( gst->guest_YMM4, xmm[4] );
2631 COPY_U128( gst->guest_YMM5, xmm[5] );
2632 COPY_U128( gst->guest_YMM6, xmm[6] );
2633 COPY_U128( gst->guest_YMM7, xmm[7] );
2634 COPY_U128( gst->guest_YMM8, xmm[8] );
2635 COPY_U128( gst->guest_YMM9, xmm[9] );
2636 COPY_U128( gst->guest_YMM10, xmm[10] );
2637 COPY_U128( gst->guest_YMM11, xmm[11] );
2638 COPY_U128( gst->guest_YMM12, xmm[12] );
2639 COPY_U128( gst->guest_YMM13, xmm[13] );
2640 COPY_U128( gst->guest_YMM14, xmm[14] );
2641 COPY_U128( gst->guest_YMM15, xmm[15] );
2643 # undef COPY_U128
2645 VexEmNote warnXMM
2646 = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
2647 VexEmNote warnX87
2648 = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state);
2650 /* Prefer an X87 emwarn over an XMM one, if both exist. */
2651 if (warnX87 != EmNote_NONE)
2652 return warnX87;
2653 else
2654 return warnXMM;
2658 /*---------------------------------------------------------------*/
2659 /*--- Supporting functions for FSAVE/FRSTOR ---*/
2660 /*---------------------------------------------------------------*/
2662 /* DIRTY HELPER (writes guest state) */
2663 /* Initialise the x87 FPU state as per 'finit'. */
2664 void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
2666 Int i;
2667 gst->guest_FTOP = 0;
2668 for (i = 0; i < 8; i++) {
2669 gst->guest_FPTAG[i] = 0; /* empty */
2670 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
2672 gst->guest_FPROUND = (ULong)Irrm_NEAREST;
2673 gst->guest_FC3210 = 0;
2677 /* CALLED FROM GENERATED CODE */
2678 /* DIRTY HELPER (reads guest memory) */
2679 ULong amd64g_dirtyhelper_loadF80le ( Addr addrU )
2681 ULong f64;
2682 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
2683 return f64;
2686 /* CALLED FROM GENERATED CODE */
2687 /* DIRTY HELPER (writes guest memory) */
2688 void amd64g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
2690 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
2694 /* CALLED FROM GENERATED CODE */
2695 /* CLEAN HELPER */
2696 /* mxcsr[15:0] contains a SSE native format MXCSR value.
2697 Extract from it the required SSEROUND value and any resulting
2698 emulation warning, and return (warn << 32) | sseround value.
2700 ULong amd64g_check_ldmxcsr ( ULong mxcsr )
2702 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
2703 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2704 ULong rmode = (mxcsr >> 13) & 3;
2706 /* Detect any required emulation warnings. */
2707 VexEmNote ew = EmNote_NONE;
2709 if ((mxcsr & 0x1F80) != 0x1F80) {
2710 /* unmasked exceptions! */
2711 ew = EmWarn_X86_sseExns;
2713 else
2714 if (mxcsr & (1<<15)) {
2715 /* FZ is set */
2716 ew = EmWarn_X86_fz;
2718 else
2719 if (mxcsr & (1<<6)) {
2720 /* DAZ is set */
2721 ew = EmWarn_X86_daz;
2724 return (((ULong)ew) << 32) | ((ULong)rmode);
2728 /* CALLED FROM GENERATED CODE */
2729 /* CLEAN HELPER */
2730 /* Given sseround as an IRRoundingMode value, create a suitable SSE
2731 native format MXCSR value. */
2732 ULong amd64g_create_mxcsr ( ULong sseround )
2734 sseround &= 3;
2735 return 0x1F80 | (sseround << 13);
2739 /* CLEAN HELPER */
2740 /* fpucw[15:0] contains a x87 native format FPU control word.
2741 Extract from it the required FPROUND value and any resulting
2742 emulation warning, and return (warn << 32) | fpround value.
2744 ULong amd64g_check_fldcw ( ULong fpucw )
2746 /* Decide on a rounding mode. fpucw[11:10] holds it. */
2747 /* NOTE, encoded exactly as per enum IRRoundingMode. */
2748 ULong rmode = (fpucw >> 10) & 3;
2750 /* Detect any required emulation warnings. */
2751 VexEmNote ew = EmNote_NONE;
2753 if ((fpucw & 0x3F) != 0x3F) {
2754 /* unmasked exceptions! */
2755 ew = EmWarn_X86_x87exns;
2757 else
2758 if (((fpucw >> 8) & 3) != 3) {
2759 /* unsupported precision */
2760 ew = EmWarn_X86_x87precision;
2763 return (((ULong)ew) << 32) | ((ULong)rmode);
2767 /* CLEAN HELPER */
2768 /* Given fpround as an IRRoundingMode value, create a suitable x87
2769 native format FPU control word. */
2770 ULong amd64g_create_fpucw ( ULong fpround )
2772 fpround &= 3;
2773 return 0x037F | (fpround << 10);
2777 /* This is used to implement 'fldenv'.
2778 Reads 28 bytes at x87_state[0 .. 27]. */
2779 /* CALLED FROM GENERATED CODE */
2780 /* DIRTY HELPER */
2781 VexEmNote amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
2782 /*IN*/HWord x87_state)
2784 return do_put_x87( False, (Fpu_State*)x87_state, vex_state );
2788 /* CALLED FROM GENERATED CODE */
2789 /* DIRTY HELPER */
2790 /* Create an x87 FPU env from the guest state, as close as we can
2791 approximate it. Writes 28 bytes at x87_state[0..27]. */
2792 void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
2793 /*OUT*/HWord x87_state )
2795 Int i, stno, preg;
2796 UInt tagw;
2797 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2798 Fpu_State* x87 = (Fpu_State*)x87_state;
2799 UInt ftop = vex_state->guest_FTOP;
2800 ULong c3210 = vex_state->guest_FC3210;
2802 for (i = 0; i < 14; i++)
2803 x87->env[i] = 0;
2805 x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
2806 x87->env[FP_ENV_STAT]
2807 = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
2808 x87->env[FP_ENV_CTRL]
2809 = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
2811 /* Compute the x87 tag word. */
2812 tagw = 0;
2813 for (stno = 0; stno < 8; stno++) {
2814 preg = (stno + ftop) & 7;
2815 if (vexTags[preg] == 0) {
2816 /* register is empty */
2817 tagw |= (3 << (2*preg));
2818 } else {
2819 /* register is full. */
2820 tagw |= (0 << (2*preg));
2823 x87->env[FP_ENV_TAG] = toUShort(tagw);
2825 /* We don't dump the x87 registers, tho. */
2829 /* This is used to implement 'fnsave'.
2830 Writes 108 bytes at x87_state[0 .. 107]. */
2831 /* CALLED FROM GENERATED CODE */
2832 /* DIRTY HELPER */
2833 void amd64g_dirtyhelper_FNSAVE ( /*IN*/VexGuestAMD64State* vex_state,
2834 /*OUT*/HWord x87_state)
2836 do_get_x87( vex_state, (Fpu_State*)x87_state );
2840 /* This is used to implement 'fnsaves'.
2841 Writes 94 bytes at x87_state[0 .. 93]. */
2842 /* CALLED FROM GENERATED CODE */
2843 /* DIRTY HELPER */
2844 void amd64g_dirtyhelper_FNSAVES ( /*IN*/VexGuestAMD64State* vex_state,
2845 /*OUT*/HWord x87_state)
2847 Int i, stno, preg;
2848 UInt tagw;
2849 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2850 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2851 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2852 UInt ftop = vex_state->guest_FTOP;
2853 UInt c3210 = vex_state->guest_FC3210;
2855 for (i = 0; i < 7; i++)
2856 x87->env[i] = 0;
2858 x87->env[FPS_ENV_STAT]
2859 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
2860 x87->env[FPS_ENV_CTRL]
2861 = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
2863 /* Dump the register stack in ST order. */
2864 tagw = 0;
2865 for (stno = 0; stno < 8; stno++) {
2866 preg = (stno + ftop) & 7;
2867 if (vexTags[preg] == 0) {
2868 /* register is empty */
2869 tagw |= (3 << (2*preg));
2870 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2871 &x87->reg[10*stno] );
2872 } else {
2873 /* register is full. */
2874 tagw |= (0 << (2*preg));
2875 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
2876 &x87->reg[10*stno] );
2879 x87->env[FPS_ENV_TAG] = toUShort(tagw);
2883 /* This is used to implement 'frstor'.
2884 Reads 108 bytes at x87_state[0 .. 107]. */
2885 /* CALLED FROM GENERATED CODE */
2886 /* DIRTY HELPER */
2887 VexEmNote amd64g_dirtyhelper_FRSTOR ( /*OUT*/VexGuestAMD64State* vex_state,
2888 /*IN*/HWord x87_state)
2890 return do_put_x87( True, (Fpu_State*)x87_state, vex_state );
2894 /* This is used to implement 'frstors'.
2895 Reads 94 bytes at x87_state[0 .. 93]. */
2896 /* CALLED FROM GENERATED CODE */
2897 /* DIRTY HELPER */
2898 VexEmNote amd64g_dirtyhelper_FRSTORS ( /*OUT*/VexGuestAMD64State* vex_state,
2899 /*IN*/HWord x87_state)
2901 Int stno, preg;
2902 UInt tag;
2903 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
2904 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
2905 Fpu_State_16* x87 = (Fpu_State_16*)x87_state;
2906 UInt ftop = (x87->env[FPS_ENV_STAT] >> 11) & 7;
2907 UInt tagw = x87->env[FPS_ENV_TAG];
2908 UInt fpucw = x87->env[FPS_ENV_CTRL];
2909 UInt c3210 = x87->env[FPS_ENV_STAT] & 0x4700;
2910 VexEmNote ew;
2911 UInt fpround;
2912 ULong pair;
2914 /* Copy registers and tags */
2915 for (stno = 0; stno < 8; stno++) {
2916 preg = (stno + ftop) & 7;
2917 tag = (tagw >> (2*preg)) & 3;
2918 if (tag == 3) {
2919 /* register is empty */
2920 /* hmm, if it's empty, does it still get written? Probably
2921 safer to say it does. If we don't, memcheck could get out
2922 of sync, in that it thinks all FP registers are defined by
2923 this helper, but in reality some have not been updated. */
2924 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
2925 vexTags[preg] = 0;
2926 } else {
2927 /* register is non-empty */
2928 convert_f80le_to_f64le( &x87->reg[10*stno],
2929 (UChar*)&vexRegs[preg] );
2930 vexTags[preg] = 1;
2934 /* stack pointer */
2935 vex_state->guest_FTOP = ftop;
2937 /* status word */
2938 vex_state->guest_FC3210 = c3210;
2940 /* handle the control word, setting FPROUND and detecting any
2941 emulation warnings. */
2942 pair = amd64g_check_fldcw ( (ULong)fpucw );
2943 fpround = (UInt)pair & 0xFFFFFFFFULL;
2944 ew = (VexEmNote)(pair >> 32);
2946 vex_state->guest_FPROUND = fpround & 3;
2948 /* emulation warnings --> caller */
2949 return ew;
2953 /*---------------------------------------------------------------*/
2954 /*--- CPUID helpers. ---*/
2955 /*---------------------------------------------------------------*/
2957 /* Claim to be the following CPU, which is probably representative of
2958 the lowliest (earliest) amd64 offerings. It can do neither sse3
2959 nor cx16.
2961 vendor_id : AuthenticAMD
2962 cpu family : 15
2963 model : 5
2964 model name : AMD Opteron (tm) Processor 848
2965 stepping : 10
2966 cpu MHz : 1797.682
2967 cache size : 1024 KB
2968 fpu : yes
2969 fpu_exception : yes
2970 cpuid level : 1
2971 wp : yes
2972 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2973 mtrr pge mca cmov pat pse36 clflush mmx fxsr
2974 sse sse2 syscall nx mmxext lm 3dnowext 3dnow
2975 bogomips : 3600.62
2976 TLB size : 1088 4K pages
2977 clflush size : 64
2978 cache_alignment : 64
2979 address sizes : 40 bits physical, 48 bits virtual
2980 power management: ts fid vid ttp
2982 2012-Feb-21: don't claim 3dnow or 3dnowext, since in fact
2983 we don't support them. See #291568. 3dnow is 80000001.EDX.31
2984 and 3dnowext is 80000001.EDX.30.
2986 void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
2988 # define SET_ABCD(_a,_b,_c,_d) \
2989 do { st->guest_RAX = (ULong)(_a); \
2990 st->guest_RBX = (ULong)(_b); \
2991 st->guest_RCX = (ULong)(_c); \
2992 st->guest_RDX = (ULong)(_d); \
2993 } while (0)
2995 switch (0xFFFFFFFF & st->guest_RAX) {
2996 case 0x00000000:
2997 SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
2998 break;
2999 case 0x00000001:
3000 SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
3001 break;
3002 case 0x80000000:
3003 SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
3004 break;
3005 case 0x80000001:
3006 /* Don't claim to support 3dnow or 3dnowext. 0xe1d3fbff is
3007 the original it-is-supported value that the h/w provides.
3008 See #291568. */
3009 SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, /*0xe1d3fbff*/
3010 0x21d3fbff);
3011 break;
3012 case 0x80000002:
3013 SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
3014 break;
3015 case 0x80000003:
3016 SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
3017 break;
3018 case 0x80000004:
3019 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3020 break;
3021 case 0x80000005:
3022 SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
3023 break;
3024 case 0x80000006:
3025 SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
3026 break;
3027 case 0x80000007:
3028 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
3029 break;
3030 case 0x80000008:
3031 SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
3032 break;
3033 default:
3034 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3035 break;
3037 # undef SET_ABCD
3041 /* Claim to be the following CPU (2 x ...), which is sse3 and cx16
3042 capable.
3044 vendor_id : GenuineIntel
3045 cpu family : 6
3046 model : 15
3047 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
3048 stepping : 6
3049 cpu MHz : 2394.000
3050 cache size : 4096 KB
3051 physical id : 0
3052 siblings : 2
3053 core id : 0
3054 cpu cores : 2
3055 fpu : yes
3056 fpu_exception : yes
3057 cpuid level : 10
3058 wp : yes
3059 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3060 mtrr pge mca cmov pat pse36 clflush dts acpi
3061 mmx fxsr sse sse2 ss ht tm syscall nx lm
3062 constant_tsc pni monitor ds_cpl vmx est tm2
3063 cx16 xtpr lahf_lm
3064 bogomips : 4798.78
3065 clflush size : 64
3066 cache_alignment : 64
3067 address sizes : 36 bits physical, 48 bits virtual
3068 power management:
3070 void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
3072 # define SET_ABCD(_a,_b,_c,_d) \
3073 do { st->guest_RAX = (ULong)(_a); \
3074 st->guest_RBX = (ULong)(_b); \
3075 st->guest_RCX = (ULong)(_c); \
3076 st->guest_RDX = (ULong)(_d); \
3077 } while (0)
3079 switch (0xFFFFFFFF & st->guest_RAX) {
3080 case 0x00000000:
3081 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
3082 break;
3083 case 0x00000001:
3084 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
3085 break;
3086 case 0x00000002:
3087 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
3088 break;
3089 case 0x00000003:
3090 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3091 break;
3092 case 0x00000004: {
3093 switch (0xFFFFFFFF & st->guest_RCX) {
3094 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
3095 0x0000003f, 0x00000001); break;
3096 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
3097 0x0000003f, 0x00000001); break;
3098 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
3099 0x00000fff, 0x00000001); break;
3100 default: SET_ABCD(0x00000000, 0x00000000,
3101 0x00000000, 0x00000000); break;
3103 break;
3105 case 0x00000005:
3106 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
3107 break;
3108 case 0x00000006:
3109 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
3110 break;
3111 case 0x00000007:
3112 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3113 break;
3114 case 0x00000008:
3115 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
3116 break;
3117 case 0x00000009:
3118 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3119 break;
3120 case 0x0000000a:
3121 unhandled_eax_value:
3122 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
3123 break;
3124 case 0x80000000:
3125 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3126 break;
3127 case 0x80000001:
3128 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
3129 break;
3130 case 0x80000002:
3131 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3132 break;
3133 case 0x80000003:
3134 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
3135 break;
3136 case 0x80000004:
3137 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
3138 break;
3139 case 0x80000005:
3140 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3141 break;
3142 case 0x80000006:
3143 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
3144 break;
3145 case 0x80000007:
3146 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3147 break;
3148 case 0x80000008:
3149 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3150 break;
3151 default:
3152 goto unhandled_eax_value;
3154 # undef SET_ABCD
3158 /* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
3159 capable.
3161 vendor_id : GenuineIntel
3162 cpu family : 6
3163 model : 37
3164 model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
3165 stepping : 2
3166 cpu MHz : 3334.000
3167 cache size : 4096 KB
3168 physical id : 0
3169 siblings : 4
3170 core id : 0
3171 cpu cores : 2
3172 apicid : 0
3173 initial apicid : 0
3174 fpu : yes
3175 fpu_exception : yes
3176 cpuid level : 11
3177 wp : yes
3178 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3179 mtrr pge mca cmov pat pse36 clflush dts acpi
3180 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3181 lm constant_tsc arch_perfmon pebs bts rep_good
3182 xtopology nonstop_tsc aperfmperf pni pclmulqdq
3183 dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
3184 xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
3185 arat tpr_shadow vnmi flexpriority ept vpid
3186 bogomips : 6957.57
3187 clflush size : 64
3188 cache_alignment : 64
3189 address sizes : 36 bits physical, 48 bits virtual
3190 power management:
3192 void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
3194 # define SET_ABCD(_a,_b,_c,_d) \
3195 do { st->guest_RAX = (ULong)(_a); \
3196 st->guest_RBX = (ULong)(_b); \
3197 st->guest_RCX = (ULong)(_c); \
3198 st->guest_RDX = (ULong)(_d); \
3199 } while (0)
3201 UInt old_eax = (UInt)st->guest_RAX;
3202 UInt old_ecx = (UInt)st->guest_RCX;
3204 switch (old_eax) {
3205 case 0x00000000:
3206 SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
3207 break;
3208 case 0x00000001:
3209 SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
3210 break;
3211 case 0x00000002:
3212 SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
3213 break;
3214 case 0x00000003:
3215 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3216 break;
3217 case 0x00000004:
3218 switch (old_ecx) {
3219 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3220 0x0000003f, 0x00000000); break;
3221 case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
3222 0x0000007f, 0x00000000); break;
3223 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3224 0x000001ff, 0x00000000); break;
3225 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3226 0x00000fff, 0x00000002); break;
3227 default: SET_ABCD(0x00000000, 0x00000000,
3228 0x00000000, 0x00000000); break;
3230 break;
3231 case 0x00000005:
3232 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3233 break;
3234 case 0x00000006:
3235 SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
3236 break;
3237 case 0x00000007:
3238 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3239 break;
3240 case 0x00000008:
3241 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3242 break;
3243 case 0x00000009:
3244 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3245 break;
3246 case 0x0000000a:
3247 SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
3248 break;
3249 case 0x0000000b:
3250 switch (old_ecx) {
3251 case 0x00000000:
3252 SET_ABCD(0x00000001, 0x00000002,
3253 0x00000100, 0x00000000); break;
3254 case 0x00000001:
3255 SET_ABCD(0x00000004, 0x00000004,
3256 0x00000201, 0x00000000); break;
3257 default:
3258 SET_ABCD(0x00000000, 0x00000000,
3259 old_ecx, 0x00000000); break;
3261 break;
3262 case 0x0000000c:
3263 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3264 break;
3265 case 0x0000000d:
3266 switch (old_ecx) {
3267 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3268 0x00000100, 0x00000000); break;
3269 case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
3270 0x00000201, 0x00000000); break;
3271 default: SET_ABCD(0x00000000, 0x00000000,
3272 old_ecx, 0x00000000); break;
3274 break;
3275 case 0x80000000:
3276 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3277 break;
3278 case 0x80000001:
3279 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3280 break;
3281 case 0x80000002:
3282 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3283 break;
3284 case 0x80000003:
3285 SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
3286 break;
3287 case 0x80000004:
3288 SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
3289 break;
3290 case 0x80000005:
3291 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3292 break;
3293 case 0x80000006:
3294 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3295 break;
3296 case 0x80000007:
3297 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3298 break;
3299 case 0x80000008:
3300 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3301 break;
3302 default:
3303 SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
3304 break;
3306 # undef SET_ABCD
3310 /* Claim to be the following CPU (4 x ...), which is AVX and cx16
3311 capable. Plus (kludge!) it "supports" HTM.
3313 Also with the following change: claim that XSaveOpt is not
3314 available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
3315 on the real CPU. Consequently, programs that correctly observe
3316 these CPUID values should only try to use 3 of the 8 XSave-family
3317 instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
3318 having to implement the compacted or optimised save/restore
3319 variants.
3321 vendor_id : GenuineIntel
3322 cpu family : 6
3323 model : 42
3324 model name : Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz
3325 stepping : 7
3326 cpu MHz : 1600.000
3327 cache size : 6144 KB
3328 physical id : 0
3329 siblings : 4
3330 core id : 3
3331 cpu cores : 4
3332 apicid : 6
3333 initial apicid : 6
3334 fpu : yes
3335 fpu_exception : yes
3336 cpuid level : 13
3337 wp : yes
3338 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
3339 mtrr pge mca cmov pat pse36 clflush dts acpi
3340 mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
3341 lm constant_tsc arch_perfmon pebs bts rep_good
3342 nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq
3343 dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16
3344 xtpr pdcm sse4_1 sse4_2 popcnt aes xsave avx
3345 lahf_lm ida arat epb xsaveopt pln pts dts
3346 tpr_shadow vnmi flexpriority ept vpid
3348 bogomips : 5768.94
3349 clflush size : 64
3350 cache_alignment : 64
3351 address sizes : 36 bits physical, 48 bits virtual
3352 power management:
3354 void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st,
3355 ULong hasF16C, ULong hasRDRAND,
3356 ULong hasRDSEED )
3358 vassert((hasF16C >> 1) == 0ULL);
3359 vassert((hasRDRAND >> 1) == 0ULL);
3360 # define SET_ABCD(_a,_b,_c,_d) \
3361 do { st->guest_RAX = (ULong)(_a); \
3362 st->guest_RBX = (ULong)(_b); \
3363 st->guest_RCX = (ULong)(_c); \
3364 st->guest_RDX = (ULong)(_d); \
3365 } while (0)
3367 UInt old_eax = (UInt)st->guest_RAX;
3368 UInt old_ecx = (UInt)st->guest_RCX;
3370 switch (old_eax) {
3371 case 0x00000000:
3372 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3373 break;
3374 case 0x00000001: {
3375 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3376 // but patch in support for them as directed by the caller.
3377 UInt ecx_extra
3378 = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
3379 SET_ABCD(0x000206a7, 0x00100800, (0x1f9ae3bf | ecx_extra), 0xbfebfbff);
3380 break;
3382 case 0x00000002:
3383 SET_ABCD(0x76035a01, 0x00f0b0ff, 0x00000000, 0x00ca0000);
3384 break;
3385 case 0x00000003:
3386 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3387 break;
3388 case 0x00000004:
3389 switch (old_ecx) {
3390 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3391 0x0000003f, 0x00000000); break;
3392 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3393 0x0000003f, 0x00000000); break;
3394 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3395 0x000001ff, 0x00000000); break;
3396 case 0x00000003: SET_ABCD(0x1c03c163, 0x02c0003f,
3397 0x00001fff, 0x00000006); break;
3398 default: SET_ABCD(0x00000000, 0x00000000,
3399 0x00000000, 0x00000000); break;
3401 break;
3402 case 0x00000005:
3403 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
3404 break;
3405 case 0x00000006:
3406 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3407 break;
3408 case 0x00000007: {
3409 UInt ebx_extra = 0;
3410 if (old_ecx == 0)
3411 ebx_extra = hasRDSEED ? (1U << 18) : 0;
3412 SET_ABCD(0x00000000, 0x00000800 | ebx_extra, 0x00000000,
3413 0x00000000);
3414 break;
3416 case 0x00000008:
3417 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3418 break;
3419 case 0x00000009:
3420 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3421 break;
3422 case 0x0000000a:
3423 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3424 break;
3425 case 0x0000000b:
3426 switch (old_ecx) {
3427 case 0x00000000:
3428 SET_ABCD(0x00000001, 0x00000001,
3429 0x00000100, 0x00000000); break;
3430 case 0x00000001:
3431 SET_ABCD(0x00000004, 0x00000004,
3432 0x00000201, 0x00000000); break;
3433 default:
3434 SET_ABCD(0x00000000, 0x00000000,
3435 old_ecx, 0x00000000); break;
3437 break;
3438 case 0x0000000c:
3439 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3440 break;
3441 case 0x0000000d:
3442 switch (old_ecx) {
3443 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3444 0x00000340, 0x00000000); break;
3445 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3446 0x00000000, 0x00000000); break;
3447 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3448 0x00000000, 0x00000000); break;
3449 default: SET_ABCD(0x00000000, 0x00000000,
3450 0x00000000, 0x00000000); break;
3452 break;
3453 case 0x0000000e:
3454 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3455 break;
3456 case 0x0000000f:
3457 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3458 break;
3459 case 0x80000000:
3460 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3461 break;
3462 case 0x80000001:
3463 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
3464 break;
3465 case 0x80000002:
3466 SET_ABCD(0x20202020, 0x20202020, 0x65746e49, 0x2952286c);
3467 break;
3468 case 0x80000003:
3469 SET_ABCD(0x726f4320, 0x4d542865, 0x35692029, 0x3033322d);
3470 break;
3471 case 0x80000004:
3472 SET_ABCD(0x50432030, 0x20402055, 0x30382e32, 0x007a4847);
3473 break;
3474 case 0x80000005:
3475 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3476 break;
3477 case 0x80000006:
3478 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3479 break;
3480 case 0x80000007:
3481 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3482 break;
3483 case 0x80000008:
3484 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
3485 break;
3486 default:
3487 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3488 break;
3490 # undef SET_ABCD
3494 /* Claim to be the following CPU (4 x ...), which is AVX2 capable.
3496 With the following change: claim that XSaveOpt is not available, by
3497 cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
3498 CPU. Consequently, programs that correctly observe these CPUID
3499 values should only try to use 3 of the 8 XSave-family instructions:
3500 XGETBV, XSAVE and XRSTOR. In particular this avoids having to
3501 implement the compacted or optimised save/restore variants.
3503 vendor_id : GenuineIntel
3504 cpu family : 6
3505 model : 60
3506 model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
3507 stepping : 3
3508 microcode : 0x1c
3509 cpu MHz : 919.957
3510 cache size : 8192 KB
3511 physical id : 0
3512 siblings : 4
3513 core id : 3
3514 cpu cores : 4
3515 apicid : 6
3516 initial apicid : 6
3517 fpu : yes
3518 fpu_exception : yes
3519 cpuid level : 13
3520 wp : yes
3521 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
3522 cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
3523 tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
3524 arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
3525 aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
3526 vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
3527 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
3528 avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
3529 tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
3530 bmi1 avx2 smep bmi2 erms invpcid xsaveopt
3531 bugs :
3532 bogomips : 5786.68
3533 clflush size : 64
3534 cache_alignment : 64
3535 address sizes : 39 bits physical, 48 bits virtual
3536 power management:
3538 void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st,
3539 ULong hasF16C, ULong hasRDRAND,
3540 ULong hasRDSEED )
3542 vassert((hasF16C >> 1) == 0ULL);
3543 vassert((hasRDRAND >> 1) == 0ULL);
3544 # define SET_ABCD(_a,_b,_c,_d) \
3545 do { st->guest_RAX = (ULong)(_a); \
3546 st->guest_RBX = (ULong)(_b); \
3547 st->guest_RCX = (ULong)(_c); \
3548 st->guest_RDX = (ULong)(_d); \
3549 } while (0)
3551 UInt old_eax = (UInt)st->guest_RAX;
3552 UInt old_ecx = (UInt)st->guest_RCX;
3554 switch (old_eax) {
3555 case 0x00000000:
3556 SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
3557 break;
3558 case 0x00000001: {
3559 // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
3560 // but patch in support for them as directed by the caller.
3561 UInt ecx_extra
3562 = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
3563 SET_ABCD(0x000306c3, 0x02100800, (0x1ffafbff | ecx_extra), 0xbfebfbff);
3564 break;
3566 case 0x00000002:
3567 SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
3568 break;
3569 case 0x00000003:
3570 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3571 break;
3572 case 0x00000004:
3573 switch (old_ecx) {
3574 case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
3575 0x0000003f, 0x00000000); break;
3576 case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
3577 0x0000003f, 0x00000000); break;
3578 case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
3579 0x000001ff, 0x00000000); break;
3580 case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
3581 0x00001fff, 0x00000006); break;
3582 default: SET_ABCD(0x00000000, 0x00000000,
3583 0x00000000, 0x00000000); break;
3585 break;
3586 case 0x00000005:
3587 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
3588 break;
3589 case 0x00000006:
3590 SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
3591 break;
3592 case 0x00000007:
3593 switch (old_ecx) {
3594 /* Don't advertise FSGSBASE support, bit 0 in EBX. */
3596 case 0x00000000: {
3597 UInt ebx_extra = hasRDSEED ? (1U << 18) : 0;
3598 SET_ABCD(0x00000000, 0x000027aa | ebx_extra,
3599 0x00000000, 0x00000000); break;
3601 default: SET_ABCD(0x00000000, 0x00000000,
3602 0x00000000, 0x00000000); break;
3604 break;
3605 case 0x00000008:
3606 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3607 break;
3608 case 0x00000009:
3609 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3610 break;
3611 case 0x0000000a:
3612 SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
3613 break;
3614 case 0x0000000b:
3615 switch (old_ecx) {
3616 case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
3617 0x00000100, 0x00000002); break;
3618 case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
3619 0x00000201, 0x00000002); break;
3620 default: SET_ABCD(0x00000000, 0x00000000,
3621 old_ecx, 0x00000002); break;
3623 break;
3624 case 0x0000000c:
3625 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3626 break;
3627 case 0x0000000d:
3628 switch (old_ecx) {
3629 case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
3630 0x00000340, 0x00000000); break;
3631 case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
3632 0x00000000, 0x00000000); break;
3633 case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
3634 0x00000000, 0x00000000); break;
3635 default: SET_ABCD(0x00000000, 0x00000000,
3636 0x00000000, 0x00000000); break;
3638 break;
3639 case 0x80000000:
3640 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
3641 break;
3642 case 0x80000001:
3643 SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
3644 break;
3645 case 0x80000002:
3646 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
3647 break;
3648 case 0x80000003:
3649 SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
3650 break;
3651 case 0x80000004:
3652 SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
3653 break;
3654 case 0x80000005:
3655 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
3656 break;
3657 case 0x80000006:
3658 SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
3659 break;
3660 case 0x80000007:
3661 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
3662 break;
3663 case 0x80000008:
3664 SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
3665 break;
3666 default:
3667 SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
3668 break;
3670 # undef SET_ABCD
3674 /*---------------------------------------------------------------*/
3675 /*--- Misc integer helpers, including rotates and crypto. ---*/
3676 /*---------------------------------------------------------------*/
3678 ULong amd64g_calculate_RCR ( ULong arg,
3679 ULong rot_amt,
3680 ULong rflags_in,
3681 Long szIN )
3683 Bool wantRflags = toBool(szIN < 0);
3684 ULong sz = wantRflags ? (-szIN) : szIN;
3685 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3686 ULong cf=0, of=0, tempcf;
3688 switch (sz) {
3689 case 8:
3690 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3691 of = ((arg >> 63) ^ cf) & 1;
3692 while (tempCOUNT > 0) {
3693 tempcf = arg & 1;
3694 arg = (arg >> 1) | (cf << 63);
3695 cf = tempcf;
3696 tempCOUNT--;
3698 break;
3699 case 4:
3700 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3701 of = ((arg >> 31) ^ cf) & 1;
3702 while (tempCOUNT > 0) {
3703 tempcf = arg & 1;
3704 arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
3705 cf = tempcf;
3706 tempCOUNT--;
3708 break;
3709 case 2:
3710 while (tempCOUNT >= 17) tempCOUNT -= 17;
3711 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3712 of = ((arg >> 15) ^ cf) & 1;
3713 while (tempCOUNT > 0) {
3714 tempcf = arg & 1;
3715 arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
3716 cf = tempcf;
3717 tempCOUNT--;
3719 break;
3720 case 1:
3721 while (tempCOUNT >= 9) tempCOUNT -= 9;
3722 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3723 of = ((arg >> 7) ^ cf) & 1;
3724 while (tempCOUNT > 0) {
3725 tempcf = arg & 1;
3726 arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
3727 cf = tempcf;
3728 tempCOUNT--;
3730 break;
3731 default:
3732 vpanic("calculate_RCR(amd64g): invalid size");
3735 cf &= 1;
3736 of &= 1;
3737 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3738 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3740 /* caller can ask to have back either the resulting flags or
3741 resulting value, but not both */
3742 return wantRflags ? rflags_in : arg;
3745 ULong amd64g_calculate_RCL ( ULong arg,
3746 ULong rot_amt,
3747 ULong rflags_in,
3748 Long szIN )
3750 Bool wantRflags = toBool(szIN < 0);
3751 ULong sz = wantRflags ? (-szIN) : szIN;
3752 ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
3753 ULong cf=0, of=0, tempcf;
3755 switch (sz) {
3756 case 8:
3757 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3758 while (tempCOUNT > 0) {
3759 tempcf = (arg >> 63) & 1;
3760 arg = (arg << 1) | (cf & 1);
3761 cf = tempcf;
3762 tempCOUNT--;
3764 of = ((arg >> 63) ^ cf) & 1;
3765 break;
3766 case 4:
3767 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3768 while (tempCOUNT > 0) {
3769 tempcf = (arg >> 31) & 1;
3770 arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
3771 cf = tempcf;
3772 tempCOUNT--;
3774 of = ((arg >> 31) ^ cf) & 1;
3775 break;
3776 case 2:
3777 while (tempCOUNT >= 17) tempCOUNT -= 17;
3778 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3779 while (tempCOUNT > 0) {
3780 tempcf = (arg >> 15) & 1;
3781 arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
3782 cf = tempcf;
3783 tempCOUNT--;
3785 of = ((arg >> 15) ^ cf) & 1;
3786 break;
3787 case 1:
3788 while (tempCOUNT >= 9) tempCOUNT -= 9;
3789 cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
3790 while (tempCOUNT > 0) {
3791 tempcf = (arg >> 7) & 1;
3792 arg = 0xFFULL & ((arg << 1) | (cf & 1));
3793 cf = tempcf;
3794 tempCOUNT--;
3796 of = ((arg >> 7) ^ cf) & 1;
3797 break;
3798 default:
3799 vpanic("calculate_RCL(amd64g): invalid size");
3802 cf &= 1;
3803 of &= 1;
3804 rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
3805 rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
3807 return wantRflags ? rflags_in : arg;
3810 /* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
3811 * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
3813 ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
3815 ULong hi, lo, tmp, A[16];
3817 A[0] = 0; A[1] = a;
3818 A[2] = A[1] << 1; A[3] = A[2] ^ a;
3819 A[4] = A[2] << 1; A[5] = A[4] ^ a;
3820 A[6] = A[3] << 1; A[7] = A[6] ^ a;
3821 A[8] = A[4] << 1; A[9] = A[8] ^ a;
3822 A[10] = A[5] << 1; A[11] = A[10] ^ a;
3823 A[12] = A[6] << 1; A[13] = A[12] ^ a;
3824 A[14] = A[7] << 1; A[15] = A[14] ^ a;
3826 lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
3827 hi = lo >> 56;
3828 lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
3829 hi = (hi << 8) | (lo >> 56);
3830 lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
3831 hi = (hi << 8) | (lo >> 56);
3832 lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
3833 hi = (hi << 8) | (lo >> 56);
3834 lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
3835 hi = (hi << 8) | (lo >> 56);
3836 lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
3837 hi = (hi << 8) | (lo >> 56);
3838 lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
3839 hi = (hi << 8) | (lo >> 56);
3840 lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
3842 ULong m0 = -1;
3843 m0 /= 255;
3844 tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
3845 tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
3846 tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
3847 tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
3848 tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
3849 tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
3850 tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
3852 return which ? hi : lo;
3856 /* CALLED FROM GENERATED CODE */
3857 /* DIRTY HELPER (non-referentially-transparent) */
3858 /* Horrible hack. On non-amd64 platforms, return 1. */
3859 ULong amd64g_dirtyhelper_RDTSC ( void )
3861 # if defined(__x86_64__)
3862 UInt eax, edx;
3863 __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
3864 return (((ULong)edx) << 32) | ((ULong)eax);
3865 # else
3866 return 1ULL;
3867 # endif
3870 /* CALLED FROM GENERATED CODE */
3871 /* DIRTY HELPER (non-referentially-transparent) */
3872 /* Horrible hack. On non-amd64 platforms, return 1. */
3873 /* This uses a different calling convention from _RDTSC just above
3874 only because of the difficulty of returning 96 bits from a C
3875 function -- RDTSC returns 64 bits and so is simple by comparison,
3876 on amd64. */
3877 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st )
3879 # if defined(__x86_64__)
3880 UInt eax, ecx, edx;
3881 __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx));
3882 st->guest_RAX = (ULong)eax;
3883 st->guest_RCX = (ULong)ecx;
3884 st->guest_RDX = (ULong)edx;
3885 # else
3886 /* Do nothing. */
3887 # endif
3890 /* CALLED FROM GENERATED CODE */
3891 /* DIRTY HELPER (non-referentially-transparent) */
3892 /* Horrible hack. On non-amd64 platforms, return 0. */
3893 ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
3895 # if defined(__x86_64__)
3896 ULong r = 0;
3897 portno &= 0xFFFF;
3898 switch (sz) {
3899 case 4:
3900 __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
3901 : "=a" (r) : "Nd" (portno));
3902 break;
3903 case 2:
3904 __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
3905 : "=a" (r) : "Nd" (portno));
3906 break;
3907 case 1:
3908 __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
3909 : "=a" (r) : "Nd" (portno));
3910 break;
3911 default:
3912 break; /* note: no 64-bit version of insn exists */
3914 return r;
3915 # else
3916 return 0;
3917 # endif
3921 /* CALLED FROM GENERATED CODE */
3922 /* DIRTY HELPER (non-referentially-transparent) */
3923 /* Horrible hack. On non-amd64 platforms, do nothing. */
3924 void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
3926 # if defined(__x86_64__)
3927 portno &= 0xFFFF;
3928 switch (sz) {
3929 case 4:
3930 __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
3931 : : "a" (data), "Nd" (portno));
3932 break;
3933 case 2:
3934 __asm__ __volatile__("outw %w0, %w1"
3935 : : "a" (data), "Nd" (portno));
3936 break;
3937 case 1:
3938 __asm__ __volatile__("outb %b0, %w1"
3939 : : "a" (data), "Nd" (portno));
3940 break;
3941 default:
3942 break; /* note: no 64-bit version of insn exists */
3944 # else
3945 /* do nothing */
3946 # endif
3949 /* CALLED FROM GENERATED CODE */
3950 /* DIRTY HELPER (non-referentially-transparent) */
3951 /* Horrible hack. On non-amd64 platforms, do nothing. */
3952 /* op = 0: call the native SGDT instruction.
3953 op = 1: call the native SIDT instruction.
3955 void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
3956 # if defined(__x86_64__)
3957 switch (op) {
3958 case 0:
3959 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
3960 break;
3961 case 1:
3962 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
3963 break;
3964 default:
3965 vpanic("amd64g_dirtyhelper_SxDT");
3967 # else
3968 /* do nothing */
3969 UChar* p = (UChar*)address;
3970 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
3971 p[6] = p[7] = p[8] = p[9] = 0;
3972 # endif
3975 /* CALLED FROM GENERATED CODE */
3976 /* DIRTY HELPER (non-referentially-transparent) */
3977 /* Horrible hack. On non-amd64 platforms, do nothing. On amd64 targets, get a
3978 32 bit random number using RDRAND, and return it and the associated rflags.C
3979 value. */
3980 ULong amd64g_dirtyhelper_RDRAND ( void ) {
3981 # if defined(__x86_64__)
3982 ULong res = 0;
3983 ULong cflag = 0;
3984 __asm__ __volatile__(
3985 "movq $0, %%r11 ; "
3986 "movq $0, %%r12 ; "
3987 "rdrand %%r11d ; "
3988 "setc %%r12b ; "
3989 "movq %%r11, %0 ; "
3990 "movq %%r12, %1"
3991 : "=r"(res), "=r"(cflag) : : "r11", "r12"
3993 res &= 0xFFFFFFFFULL;
3994 cflag &= 1ULL;
3995 return (cflag << 32) | res;
3996 # else
3997 /* There's nothing we can sensibly do. Return a value denoting
3998 "I succeeded, and the random bits are all zero" :-/ */
3999 return 1ULL << 32;
4000 # endif
4003 ULong amd64g_dirtyhelper_RDSEED ( void ) {
4004 # if defined(__x86_64__)
4005 ULong res = 0;
4006 ULong cflag = 0;
4007 __asm__ __volatile__(
4008 "movq $0, %%r11 ; "
4009 "movq $0, %%r12 ; "
4010 "rdseed %%r11d ; "
4011 "setc %%r12b ; "
4012 "movq %%r11, %0 ; "
4013 "movq %%r12, %1"
4014 : "=r"(res), "=r"(cflag) : : "r11", "r12"
4016 res &= 0xFFFFFFFFULL;
4017 cflag &= 1ULL;
4018 return (cflag << 32) | res;
4019 # else
4020 /* There's nothing we can sensibly do. Return a value denoting
4021 "I succeeded, and the random bits are all zero" :-/ */
4022 return 1ULL << 32;
4023 # endif
4026 /*---------------------------------------------------------------*/
4027 /*--- Helpers for MMX/SSE/SSE2. ---*/
4028 /*---------------------------------------------------------------*/
4030 static inline UChar abdU8 ( UChar xx, UChar yy ) {
4031 return toUChar(xx>yy ? xx-yy : yy-xx);
4034 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
4035 return (((ULong)w1) << 32) | ((ULong)w0);
4038 static inline UShort sel16x4_3 ( ULong w64 ) {
4039 UInt hi32 = toUInt(w64 >> 32);
4040 return toUShort(hi32 >> 16);
4042 static inline UShort sel16x4_2 ( ULong w64 ) {
4043 UInt hi32 = toUInt(w64 >> 32);
4044 return toUShort(hi32);
4046 static inline UShort sel16x4_1 ( ULong w64 ) {
4047 UInt lo32 = toUInt(w64);
4048 return toUShort(lo32 >> 16);
4050 static inline UShort sel16x4_0 ( ULong w64 ) {
4051 UInt lo32 = toUInt(w64);
4052 return toUShort(lo32);
4055 static inline UChar sel8x8_7 ( ULong w64 ) {
4056 UInt hi32 = toUInt(w64 >> 32);
4057 return toUChar(hi32 >> 24);
4059 static inline UChar sel8x8_6 ( ULong w64 ) {
4060 UInt hi32 = toUInt(w64 >> 32);
4061 return toUChar(hi32 >> 16);
4063 static inline UChar sel8x8_5 ( ULong w64 ) {
4064 UInt hi32 = toUInt(w64 >> 32);
4065 return toUChar(hi32 >> 8);
4067 static inline UChar sel8x8_4 ( ULong w64 ) {
4068 UInt hi32 = toUInt(w64 >> 32);
4069 return toUChar(hi32 >> 0);
4071 static inline UChar sel8x8_3 ( ULong w64 ) {
4072 UInt lo32 = toUInt(w64);
4073 return toUChar(lo32 >> 24);
4075 static inline UChar sel8x8_2 ( ULong w64 ) {
4076 UInt lo32 = toUInt(w64);
4077 return toUChar(lo32 >> 16);
4079 static inline UChar sel8x8_1 ( ULong w64 ) {
4080 UInt lo32 = toUInt(w64);
4081 return toUChar(lo32 >> 8);
4083 static inline UChar sel8x8_0 ( ULong w64 ) {
4084 UInt lo32 = toUInt(w64);
4085 return toUChar(lo32 >> 0);
4088 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4089 ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
4091 return
4092 mk32x2(
4093 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
4094 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
4095 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
4096 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
4100 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4101 ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
4103 UInt t = 0;
4104 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
4105 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
4106 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
4107 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
4108 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
4109 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
4110 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
4111 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
4112 t &= 0xFFFF;
4113 return (ULong)t;
4116 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4117 ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
4119 UShort t, min;
4120 UInt idx;
4121 t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
4122 t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
4123 t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
4124 t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
4125 t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
4126 t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
4127 t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
4128 t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
4129 return ((ULong)(idx << 16)) | ((ULong)min);
4132 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4133 ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
4135 UInt i;
4136 ULong crc = (b & 0xFFULL) ^ crcIn;
4137 for (i = 0; i < 8; i++)
4138 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
4139 return crc;
4142 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4143 ULong amd64g_calc_crc32w ( ULong crcIn, ULong w )
4145 UInt i;
4146 ULong crc = (w & 0xFFFFULL) ^ crcIn;
4147 for (i = 0; i < 16; i++)
4148 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
4149 return crc;
4152 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4153 ULong amd64g_calc_crc32l ( ULong crcIn, ULong l )
4155 UInt i;
4156 ULong crc = (l & 0xFFFFFFFFULL) ^ crcIn;
4157 for (i = 0; i < 32; i++)
4158 crc = (crc >> 1) ^ ((crc & 1) ? 0x82f63b78ULL : 0);
4159 return crc;
4162 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4163 ULong amd64g_calc_crc32q ( ULong crcIn, ULong q )
4165 ULong crc = amd64g_calc_crc32l(crcIn, q);
4166 return amd64g_calc_crc32l(crc, q >> 32);
4170 /* .. helper for next fn .. */
4171 static inline ULong sad_8x4 ( ULong xx, ULong yy )
4173 UInt t = 0;
4174 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
4175 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
4176 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
4177 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
4178 return (ULong)t;
4181 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4182 ULong amd64g_calc_mpsadbw ( ULong sHi, ULong sLo,
4183 ULong dHi, ULong dLo,
4184 ULong imm_and_return_control_bit )
4186 UInt imm8 = imm_and_return_control_bit & 7;
4187 Bool calcHi = (imm_and_return_control_bit >> 7) & 1;
4188 UInt srcOffsL = imm8 & 3; /* src offs in 32-bit (L) chunks */
4189 UInt dstOffsL = (imm8 >> 2) & 1; /* dst offs in ditto chunks */
4190 /* For src we only need 32 bits, so get them into the
4191 lower half of a 64 bit word. */
4192 ULong src = ((srcOffsL & 2) ? sHi : sLo) >> (32 * (srcOffsL & 1));
4193 /* For dst we need to get hold of 56 bits (7 bytes) from a total of
4194 11 bytes. If calculating the low part of the result, need bytes
4195 dstOffsL * 4 + (0 .. 6); if calculating the high part,
4196 dstOffsL * 4 + (4 .. 10). */
4197 ULong dst;
4198 /* dstOffL = 0, Lo -> 0 .. 6
4199 dstOffL = 1, Lo -> 4 .. 10
4200 dstOffL = 0, Hi -> 4 .. 10
4201 dstOffL = 1, Hi -> 8 .. 14
4203 if (calcHi && dstOffsL) {
4204 /* 8 .. 14 */
4205 dst = dHi & 0x00FFFFFFFFFFFFFFULL;
4207 else if (!calcHi && !dstOffsL) {
4208 /* 0 .. 6 */
4209 dst = dLo & 0x00FFFFFFFFFFFFFFULL;
4211 else {
4212 /* 4 .. 10 */
4213 dst = (dLo >> 32) | ((dHi & 0x00FFFFFFULL) << 32);
4215 ULong r0 = sad_8x4( dst >> 0, src );
4216 ULong r1 = sad_8x4( dst >> 8, src );
4217 ULong r2 = sad_8x4( dst >> 16, src );
4218 ULong r3 = sad_8x4( dst >> 24, src );
4219 ULong res = (r3 << 48) | (r2 << 32) | (r1 << 16) | r0;
4220 return res;
4223 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4224 ULong amd64g_calculate_pext ( ULong src_masked, ULong mask )
4226 ULong dst = 0;
4227 ULong src_bit;
4228 ULong dst_bit = 1;
4229 for (src_bit = 1; src_bit; src_bit <<= 1) {
4230 if (mask & src_bit) {
4231 if (src_masked & src_bit) dst |= dst_bit;
4232 dst_bit <<= 1;
4235 return dst;
4238 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
4239 ULong amd64g_calculate_pdep ( ULong src, ULong mask )
4241 ULong dst = 0;
4242 ULong dst_bit;
4243 ULong src_bit = 1;
4244 for (dst_bit = 1; dst_bit; dst_bit <<= 1) {
4245 if (mask & dst_bit) {
4246 if (src & src_bit) dst |= dst_bit;
4247 src_bit <<= 1;
4250 return dst;
4253 /*---------------------------------------------------------------*/
4254 /*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
4255 /*---------------------------------------------------------------*/
4257 static UInt zmask_from_V128 ( V128* arg )
4259 UInt i, res = 0;
4260 for (i = 0; i < 16; i++) {
4261 res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
4263 return res;
4266 static UInt zmask_from_V128_wide ( V128* arg )
4268 UInt i, res = 0;
4269 for (i = 0; i < 8; i++) {
4270 res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
4272 return res;
4275 /* Helps with PCMP{I,E}STR{I,M}.
4277 CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
4278 actually it could be a clean helper, but for the fact that we can't
4279 pass by value 2 x V128 to a clean helper, nor have one returned.)
4280 Reads guest state, writes to guest state for the xSTRM cases, no
4281 accesses of memory, is a pure function.
4283 opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
4284 the callee knows which I/E and I/M variant it is dealing with and
4285 what the specific operation is. 4th byte of opcode is in the range
4286 0x60 to 0x63:
4287 istri 66 0F 3A 63
4288 istrm 66 0F 3A 62
4289 estri 66 0F 3A 61
4290 estrm 66 0F 3A 60
4292 gstOffL and gstOffR are the guest state offsets for the two XMM
4293 register inputs. We never have to deal with the memory case since
4294 that is handled by pre-loading the relevant value into the fake
4295 XMM16 register.
4297 For ESTRx variants, edxIN and eaxIN hold the values of those two
4298 registers.
4300 In all cases, the bottom 16 bits of the result contain the new
4301 OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
4302 result hold the new %ecx value. For xSTRM variants, the helper
4303 writes the result directly to the guest XMM0.
4305 Declarable side effects: in all cases, reads guest state at
4306 [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
4307 guest_XMM0.
4309 Is expected to be called with opc_and_imm combinations which have
4310 actually been validated, and will assert if otherwise. The front
4311 end should ensure we're only called with verified values.
4313 ULong amd64g_dirtyhelper_PCMPxSTRx (
4314 VexGuestAMD64State* gst,
4315 HWord opc4_and_imm,
4316 HWord gstOffL, HWord gstOffR,
4317 HWord edxIN, HWord eaxIN
4320 HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
4321 HWord imm8 = opc4_and_imm & 0xFF;
4322 HWord isISTRx = opc4 & 2;
4323 HWord isxSTRM = (opc4 & 1) ^ 1;
4324 vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
4325 HWord wide = (imm8 & 1);
4327 // where the args are
4328 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4329 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4331 /* Create the arg validity masks, either from the vectors
4332 themselves or from the supplied edx/eax values. */
4333 // FIXME: this is only right for the 8-bit data cases.
4334 // At least that is asserted above.
4335 UInt zmaskL, zmaskR;
4337 // temp spot for the resulting flags and vector.
4338 V128 resV;
4339 UInt resOSZACP;
4341 // for checking whether case was handled
4342 Bool ok = False;
4344 if (wide) {
4345 if (isISTRx) {
4346 zmaskL = zmask_from_V128_wide(argL);
4347 zmaskR = zmask_from_V128_wide(argR);
4348 } else {
4349 Int tmp;
4350 tmp = edxIN & 0xFFFFFFFF;
4351 if (tmp < -8) tmp = -8;
4352 if (tmp > 8) tmp = 8;
4353 if (tmp < 0) tmp = -tmp;
4354 vassert(tmp >= 0 && tmp <= 8);
4355 zmaskL = (1 << tmp) & 0xFF;
4356 tmp = eaxIN & 0xFFFFFFFF;
4357 if (tmp < -8) tmp = -8;
4358 if (tmp > 8) tmp = 8;
4359 if (tmp < 0) tmp = -tmp;
4360 vassert(tmp >= 0 && tmp <= 8);
4361 zmaskR = (1 << tmp) & 0xFF;
4363 // do the meyaath
4364 ok = compute_PCMPxSTRx_wide (
4365 &resV, &resOSZACP, argL, argR,
4366 zmaskL, zmaskR, imm8, (Bool)isxSTRM
4368 } else {
4369 if (isISTRx) {
4370 zmaskL = zmask_from_V128(argL);
4371 zmaskR = zmask_from_V128(argR);
4372 } else {
4373 Int tmp;
4374 tmp = edxIN & 0xFFFFFFFF;
4375 if (tmp < -16) tmp = -16;
4376 if (tmp > 16) tmp = 16;
4377 if (tmp < 0) tmp = -tmp;
4378 vassert(tmp >= 0 && tmp <= 16);
4379 zmaskL = (1 << tmp) & 0xFFFF;
4380 tmp = eaxIN & 0xFFFFFFFF;
4381 if (tmp < -16) tmp = -16;
4382 if (tmp > 16) tmp = 16;
4383 if (tmp < 0) tmp = -tmp;
4384 vassert(tmp >= 0 && tmp <= 16);
4385 zmaskR = (1 << tmp) & 0xFFFF;
4387 // do the meyaath
4388 ok = compute_PCMPxSTRx (
4389 &resV, &resOSZACP, argL, argR,
4390 zmaskL, zmaskR, imm8, (Bool)isxSTRM
4394 // front end shouldn't pass us any imm8 variants we can't
4395 // handle. Hence:
4396 vassert(ok);
4398 // So, finally we need to get the results back to the caller.
4399 // In all cases, the new OSZACP value is the lowest 16 of
4400 // the return value.
4401 if (isxSTRM) {
4402 gst->guest_YMM0[0] = resV.w32[0];
4403 gst->guest_YMM0[1] = resV.w32[1];
4404 gst->guest_YMM0[2] = resV.w32[2];
4405 gst->guest_YMM0[3] = resV.w32[3];
4406 return resOSZACP & 0x8D5;
4407 } else {
4408 UInt newECX = resV.w32[0] & 0xFFFF;
4409 return (newECX << 16) | (resOSZACP & 0x8D5);
4413 /*---------------------------------------------------------------*/
4414 /*--- AES primitives and helpers ---*/
4415 /*---------------------------------------------------------------*/
4416 /* a 16 x 16 matrix */
4417 static const UChar sbox[256] = { // row nr
4418 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
4419 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
4420 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
4421 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
4422 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
4423 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
4424 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
4425 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
4426 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
4427 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
4428 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
4429 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
4430 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
4431 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
4432 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
4433 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
4434 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
4435 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
4436 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
4437 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
4438 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
4439 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
4440 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
4441 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
4442 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
4443 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
4444 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
4445 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
4446 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
4447 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
4448 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
4449 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
4451 static void SubBytes (V128* v)
4453 V128 r;
4454 UInt i;
4455 for (i = 0; i < 16; i++)
4456 r.w8[i] = sbox[v->w8[i]];
4457 *v = r;
4460 /* a 16 x 16 matrix */
4461 static const UChar invsbox[256] = { // row nr
4462 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
4463 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
4464 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
4465 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
4466 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
4467 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
4468 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
4469 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
4470 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
4471 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
4472 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
4473 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
4474 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
4475 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
4476 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
4477 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
4478 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
4479 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
4480 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
4481 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
4482 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
4483 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
4484 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
4485 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
4486 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
4487 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
4488 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
4489 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
4490 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
4491 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
4492 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
4493 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
4495 static void InvSubBytes (V128* v)
4497 V128 r;
4498 UInt i;
4499 for (i = 0; i < 16; i++)
4500 r.w8[i] = invsbox[v->w8[i]];
4501 *v = r;
4504 static const UChar ShiftRows_op[16] =
4505 {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
4506 static void ShiftRows (V128* v)
4508 V128 r;
4509 UInt i;
4510 for (i = 0; i < 16; i++)
4511 r.w8[i] = v->w8[ShiftRows_op[15-i]];
4512 *v = r;
4515 static const UChar InvShiftRows_op[16] =
4516 {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
4517 static void InvShiftRows (V128* v)
4519 V128 r;
4520 UInt i;
4521 for (i = 0; i < 16; i++)
4522 r.w8[i] = v->w8[InvShiftRows_op[15-i]];
4523 *v = r;
4526 /* Multiplication of the finite fields elements of AES.
4527 See "A Specification for The AES Algorithm Rijndael
4528 (by Joan Daemen & Vincent Rijmen)"
4529 Dr. Brian Gladman, v3.1, 3rd March 2001. */
4530 /* N values so that (hex) xy = 0x03^N.
4531 0x00 cannot be used. We put 0xff for this value.*/
4532 /* a 16 x 16 matrix */
4533 static const UChar Nxy[256] = { // row nr
4534 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
4535 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
4536 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
4537 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
4538 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
4539 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
4540 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
4541 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
4542 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
4543 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
4544 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
4545 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
4546 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
4547 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
4548 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
4549 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
4550 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
4551 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
4552 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
4553 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
4554 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
4555 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
4556 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
4557 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
4558 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
4559 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
4560 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
4561 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
4562 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
4563 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
4564 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
4565 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
4568 /* E values so that E = 0x03^xy. */
4569 static const UChar Exy[256] = { // row nr
4570 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
4571 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
4572 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
4573 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
4574 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
4575 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
4576 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
4577 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
4578 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
4579 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
4580 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
4581 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
4582 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
4583 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
4584 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
4585 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
4586 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
4587 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
4588 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
4589 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
4590 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
4591 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
4592 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
4593 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
4594 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
4595 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
4596 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
4597 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
4598 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
4599 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
4600 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
4601 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
4603 static inline UChar ff_mul(UChar u1, UChar u2)
4605 if ((u1 > 0) && (u2 > 0)) {
4606 UInt ui = Nxy[u1] + Nxy[u2];
4607 if (ui >= 255)
4608 ui = ui - 255;
4609 return Exy[ui];
4610 } else {
4611 return 0;
4615 static void MixColumns (V128* v)
4617 V128 r;
4618 Int j;
4619 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4620 for (j = 0; j < 4; j++) {
4621 P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
4622 ^ P(v,j,2) ^ P(v,j,3);
4623 P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
4624 ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
4625 P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
4626 ^ ff_mul(0x03, P(v,j,3) );
4627 P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
4628 ^ ff_mul( 0x02, P(v,j,3) );
4630 *v = r;
4631 #undef P
4634 static void InvMixColumns (V128* v)
4636 V128 r;
4637 Int j;
4638 #define P(x,row,col) (x)->w8[((row)*4+(col))]
4639 for (j = 0; j < 4; j++) {
4640 P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
4641 ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
4642 P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
4643 ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
4644 P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
4645 ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
4646 P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
4647 ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
4649 *v = r;
4650 #undef P
4654 /* For description, see definition in guest_amd64_defs.h */
4655 void amd64g_dirtyhelper_AES (
4656 VexGuestAMD64State* gst,
4657 HWord opc4, HWord gstOffD,
4658 HWord gstOffL, HWord gstOffR
4661 // where the args are
4662 V128* argD = (V128*)( ((UChar*)gst) + gstOffD );
4663 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4664 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4665 V128 r;
4667 switch (opc4) {
4668 case 0xDC: /* AESENC */
4669 case 0xDD: /* AESENCLAST */
4670 r = *argR;
4671 ShiftRows (&r);
4672 SubBytes (&r);
4673 if (opc4 == 0xDC)
4674 MixColumns (&r);
4675 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4676 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4677 break;
4679 case 0xDE: /* AESDEC */
4680 case 0xDF: /* AESDECLAST */
4681 r = *argR;
4682 InvShiftRows (&r);
4683 InvSubBytes (&r);
4684 if (opc4 == 0xDE)
4685 InvMixColumns (&r);
4686 argD->w64[0] = r.w64[0] ^ argL->w64[0];
4687 argD->w64[1] = r.w64[1] ^ argL->w64[1];
4688 break;
4690 case 0xDB: /* AESIMC */
4691 *argD = *argL;
4692 InvMixColumns (argD);
4693 break;
4694 default: vassert(0);
4698 static inline UInt RotWord (UInt w32)
4700 return ((w32 >> 8) | (w32 << 24));
4703 static inline UInt SubWord (UInt w32)
4705 UChar *w8;
4706 UChar *r8;
4707 UInt res;
4708 w8 = (UChar*) &w32;
4709 r8 = (UChar*) &res;
4710 r8[0] = sbox[w8[0]];
4711 r8[1] = sbox[w8[1]];
4712 r8[2] = sbox[w8[2]];
4713 r8[3] = sbox[w8[3]];
4714 return res;
4717 /* For description, see definition in guest_amd64_defs.h */
4718 extern void amd64g_dirtyhelper_AESKEYGENASSIST (
4719 VexGuestAMD64State* gst,
4720 HWord imm8,
4721 HWord gstOffL, HWord gstOffR
4724 // where the args are
4725 V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
4726 V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
4728 // We have to create the result in a temporary in the
4729 // case where the src and dst regs are the same. See #341698.
4730 V128 tmp;
4732 tmp.w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
4733 tmp.w32[2] = SubWord (argL->w32[3]);
4734 tmp.w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
4735 tmp.w32[0] = SubWord (argL->w32[1]);
4737 argR->w32[3] = tmp.w32[3];
4738 argR->w32[2] = tmp.w32[2];
4739 argR->w32[1] = tmp.w32[1];
4740 argR->w32[0] = tmp.w32[0];
4745 /*---------------------------------------------------------------*/
4746 /*--- Helpers for dealing with, and describing, ---*/
4747 /*--- guest state as a whole. ---*/
4748 /*---------------------------------------------------------------*/
4750 /* Initialise the entire amd64 guest state. */
4751 /* VISIBLE TO LIBVEX CLIENT */
4752 void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
4754 vex_state->host_EvC_FAILADDR = 0;
4755 vex_state->host_EvC_COUNTER = 0;
4756 vex_state->pad0 = 0;
4758 vex_state->guest_RAX = 0;
4759 vex_state->guest_RCX = 0;
4760 vex_state->guest_RDX = 0;
4761 vex_state->guest_RBX = 0;
4762 vex_state->guest_RSP = 0;
4763 vex_state->guest_RBP = 0;
4764 vex_state->guest_RSI = 0;
4765 vex_state->guest_RDI = 0;
4766 vex_state->guest_R8 = 0;
4767 vex_state->guest_R9 = 0;
4768 vex_state->guest_R10 = 0;
4769 vex_state->guest_R11 = 0;
4770 vex_state->guest_R12 = 0;
4771 vex_state->guest_R13 = 0;
4772 vex_state->guest_R14 = 0;
4773 vex_state->guest_R15 = 0;
4775 vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
4776 vex_state->guest_CC_DEP1 = 0;
4777 vex_state->guest_CC_DEP2 = 0;
4778 vex_state->guest_CC_NDEP = 0;
4780 vex_state->guest_DFLAG = 1; /* forwards */
4781 vex_state->guest_IDFLAG = 0;
4782 vex_state->guest_ACFLAG = 0;
4784 /* HACK: represent the offset associated with a constant %fs.
4785 Typically, on linux, this assumes that %fs is only ever zero (main
4786 thread) or 0x63. */
4787 vex_state->guest_FS_CONST = 0;
4789 vex_state->guest_RIP = 0;
4791 /* Initialise the simulated FPU */
4792 amd64g_dirtyhelper_FINIT( vex_state );
4794 /* Initialise the AVX state. */
4795 # define AVXZERO(_ymm) \
4796 do { _ymm[0]=_ymm[1]=_ymm[2]=_ymm[3] = 0; \
4797 _ymm[4]=_ymm[5]=_ymm[6]=_ymm[7] = 0; \
4798 } while (0)
4799 vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
4800 AVXZERO(vex_state->guest_YMM0);
4801 AVXZERO(vex_state->guest_YMM1);
4802 AVXZERO(vex_state->guest_YMM2);
4803 AVXZERO(vex_state->guest_YMM3);
4804 AVXZERO(vex_state->guest_YMM4);
4805 AVXZERO(vex_state->guest_YMM5);
4806 AVXZERO(vex_state->guest_YMM6);
4807 AVXZERO(vex_state->guest_YMM7);
4808 AVXZERO(vex_state->guest_YMM8);
4809 AVXZERO(vex_state->guest_YMM9);
4810 AVXZERO(vex_state->guest_YMM10);
4811 AVXZERO(vex_state->guest_YMM11);
4812 AVXZERO(vex_state->guest_YMM12);
4813 AVXZERO(vex_state->guest_YMM13);
4814 AVXZERO(vex_state->guest_YMM14);
4815 AVXZERO(vex_state->guest_YMM15);
4816 AVXZERO(vex_state->guest_YMM16);
4818 # undef AVXZERO
4820 vex_state->guest_EMNOTE = EmNote_NONE;
4822 vex_state->guest_SETC = 0;
4824 /* These should not ever be either read or written, but we
4825 initialise them anyway. */
4826 vex_state->guest_CMSTART = 0;
4827 vex_state->guest_CMLEN = 0;
4829 vex_state->guest_NRADDR = 0;
4830 vex_state->guest_SC_CLASS = 0;
4831 vex_state->guest_GS_CONST = 0;
4833 vex_state->guest_IP_AT_SYSCALL = 0;
4834 vex_state->pad1 = 0;
4838 /* Figure out if any part of the guest state contained in minoff
4839 .. maxoff requires precise memory exceptions. If in doubt return
4840 True (but this generates significantly slower code).
4842 By default we enforce precise exns for guest %RSP, %RBP and %RIP
4843 only. These are the minimum needed to extract correct stack
4844 backtraces from amd64 code.
4846 Only %RSP is needed in mode VexRegUpdSpAtMemAccess.
4848 Bool guest_amd64_state_requires_precise_mem_exns (
4849 Int minoff, Int maxoff, VexRegisterUpdates pxControl
4852 Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
4853 Int rbp_max = rbp_min + 8 - 1;
4854 Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
4855 Int rsp_max = rsp_min + 8 - 1;
4856 Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
4857 Int rip_max = rip_min + 8 - 1;
4859 if (maxoff < rsp_min || minoff > rsp_max) {
4860 /* no overlap with rsp */
4861 if (pxControl == VexRegUpdSpAtMemAccess)
4862 return False; // We only need to check stack pointer.
4863 } else {
4864 return True;
4867 if (maxoff < rbp_min || minoff > rbp_max) {
4868 /* no overlap with rbp */
4869 } else {
4870 return True;
4873 if (maxoff < rip_min || minoff > rip_max) {
4874 /* no overlap with eip */
4875 } else {
4876 return True;
4879 return False;
4883 #define ALWAYSDEFD(field) \
4884 { offsetof(VexGuestAMD64State, field), \
4885 (sizeof ((VexGuestAMD64State*)0)->field) }
4887 VexGuestLayout
4888 amd64guest_layout
4890 /* Total size of the guest state, in bytes. */
4891 .total_sizeB = sizeof(VexGuestAMD64State),
4893 /* Describe the stack pointer. */
4894 .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
4895 .sizeof_SP = 8,
4897 /* Describe the frame pointer. */
4898 .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
4899 .sizeof_FP = 8,
4901 /* Describe the instruction pointer. */
4902 .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
4903 .sizeof_IP = 8,
4905 /* Describe any sections to be regarded by Memcheck as
4906 'always-defined'. */
4907 .n_alwaysDefd = 16,
4909 /* flags thunk: OP and NDEP are always defd, whereas DEP1
4910 and DEP2 have to be tracked. See detailed comment in
4911 gdefs.h on meaning of thunk fields. */
4912 .alwaysDefd
4913 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
4914 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
4915 /* 2 */ ALWAYSDEFD(guest_DFLAG),
4916 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
4917 /* 4 */ ALWAYSDEFD(guest_RIP),
4918 /* 5 */ ALWAYSDEFD(guest_FS_CONST),
4919 /* 6 */ ALWAYSDEFD(guest_FTOP),
4920 /* 7 */ ALWAYSDEFD(guest_FPTAG),
4921 /* 8 */ ALWAYSDEFD(guest_FPROUND),
4922 /* 9 */ ALWAYSDEFD(guest_FC3210),
4923 // /* */ ALWAYSDEFD(guest_CS),
4924 // /* */ ALWAYSDEFD(guest_DS),
4925 // /* */ ALWAYSDEFD(guest_ES),
4926 // /* */ ALWAYSDEFD(guest_FS),
4927 // /* */ ALWAYSDEFD(guest_GS),
4928 // /* */ ALWAYSDEFD(guest_SS),
4929 // /* */ ALWAYSDEFD(guest_LDT),
4930 // /* */ ALWAYSDEFD(guest_GDT),
4931 /* 10 */ ALWAYSDEFD(guest_EMNOTE),
4932 /* 11 */ ALWAYSDEFD(guest_SSEROUND),
4933 /* 12 */ ALWAYSDEFD(guest_CMSTART),
4934 /* 13 */ ALWAYSDEFD(guest_CMLEN),
4935 /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
4936 /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
4941 /*---------------------------------------------------------------*/
4942 /*--- end guest_amd64_helpers.c ---*/
4943 /*---------------------------------------------------------------*/