Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / guest_x86_helpers.c
blob135e997c487888eb91dec4f7bf2d273b7e516acf
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_x86.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
40 #include "main_util.h"
41 #include "main_globals.h"
42 #include "guest_generic_bb_to_IR.h"
43 #include "guest_x86_defs.h"
44 #include "guest_generic_x87.h"
47 /* This file contains helper functions for x86 guest code.
48 Calls to these functions are generated by the back end.
49 These calls are of course in the host machine code and
50 this file will be compiled to host machine code, so that
51 all makes sense.
53 Only change the signatures of these helper functions very
54 carefully. If you change the signature here, you'll have to change
55 the parameters passed to it in the IR calls constructed by
56 guest-x86/toIR.c.
58 The convention used is that all functions called from generated
59 code are named x86g_<something>, and any function whose name lacks
60 that prefix is not called from generated code. Note that some
61 LibVEX_* functions can however be called by VEX's client, but that
62 is not the same as calling them from VEX-generated code.
66 /* Set to 1 to get detailed profiling info about use of the flag
67 machinery. */
68 #define PROFILE_EFLAGS 0
71 /*---------------------------------------------------------------*/
72 /*--- %eflags run-time helpers. ---*/
73 /*---------------------------------------------------------------*/
75 static const UChar parity_table[256] = {
76 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
77 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
78 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
79 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
80 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
81 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
82 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
83 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
84 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
85 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
86 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
87 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
88 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
89 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
90 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
91 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
92 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
93 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
94 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
95 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
96 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
97 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
98 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
99 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
100 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
101 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
102 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
103 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
104 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
105 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
106 X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
107 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
110 /* generalised left-shifter */
111 inline static Int lshift ( Int x, Int n )
113 if (n >= 0)
114 return (UInt)x << n;
115 else
116 return x >> (-n);
119 /* identity on ULong */
120 static inline ULong idULong ( ULong x )
122 return x;
126 #define PREAMBLE(__data_bits) \
127 /* const */ UInt DATA_MASK \
128 = __data_bits==8 ? 0xFF \
129 : (__data_bits==16 ? 0xFFFF \
130 : 0xFFFFFFFF); \
131 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
132 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
133 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
134 /* const */ UInt CC_NDEP = cc_ndep_formal; \
135 /* Four bogus assignments, which hopefully gcc can */ \
136 /* optimise away, and which stop it complaining about */ \
137 /* unused variables. */ \
138 SIGN_MASK = SIGN_MASK; \
139 DATA_MASK = DATA_MASK; \
140 CC_DEP2 = CC_DEP2; \
141 CC_NDEP = CC_NDEP;
144 /*-------------------------------------------------------------*/
146 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
148 PREAMBLE(DATA_BITS); \
149 { UInt cf, pf, af, zf, sf, of; \
150 UInt argL, argR, res; \
151 argL = CC_DEP1; \
152 argR = CC_DEP2; \
153 res = argL + argR; \
154 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
155 pf = parity_table[(UChar)res]; \
156 af = (res ^ argL ^ argR) & 0x10; \
157 zf = ((DATA_UTYPE)res == 0) << 6; \
158 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
159 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
160 12 - DATA_BITS) & X86G_CC_MASK_O; \
161 return cf | pf | af | zf | sf | of; \
165 /*-------------------------------------------------------------*/
167 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
169 PREAMBLE(DATA_BITS); \
170 { UInt cf, pf, af, zf, sf, of; \
171 UInt argL, argR, res; \
172 argL = CC_DEP1; \
173 argR = CC_DEP2; \
174 res = argL - argR; \
175 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
176 pf = parity_table[(UChar)res]; \
177 af = (res ^ argL ^ argR) & 0x10; \
178 zf = ((DATA_UTYPE)res == 0) << 6; \
179 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
180 of = lshift((argL ^ argR) & (argL ^ res), \
181 12 - DATA_BITS) & X86G_CC_MASK_O; \
182 return cf | pf | af | zf | sf | of; \
186 /*-------------------------------------------------------------*/
188 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
190 PREAMBLE(DATA_BITS); \
191 { UInt cf, pf, af, zf, sf, of; \
192 UInt argL, argR, oldC, res; \
193 oldC = CC_NDEP & X86G_CC_MASK_C; \
194 argL = CC_DEP1; \
195 argR = CC_DEP2 ^ oldC; \
196 res = (argL + argR) + oldC; \
197 if (oldC) \
198 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
199 else \
200 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
201 pf = parity_table[(UChar)res]; \
202 af = (res ^ argL ^ argR) & 0x10; \
203 zf = ((DATA_UTYPE)res == 0) << 6; \
204 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
205 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
206 12 - DATA_BITS) & X86G_CC_MASK_O; \
207 return cf | pf | af | zf | sf | of; \
211 /*-------------------------------------------------------------*/
213 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
215 PREAMBLE(DATA_BITS); \
216 { UInt cf, pf, af, zf, sf, of; \
217 UInt argL, argR, oldC, res; \
218 oldC = CC_NDEP & X86G_CC_MASK_C; \
219 argL = CC_DEP1; \
220 argR = CC_DEP2 ^ oldC; \
221 res = (argL - argR) - oldC; \
222 if (oldC) \
223 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
224 else \
225 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
226 pf = parity_table[(UChar)res]; \
227 af = (res ^ argL ^ argR) & 0x10; \
228 zf = ((DATA_UTYPE)res == 0) << 6; \
229 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
230 of = lshift((argL ^ argR) & (argL ^ res), \
231 12 - DATA_BITS) & X86G_CC_MASK_O; \
232 return cf | pf | af | zf | sf | of; \
236 /*-------------------------------------------------------------*/
238 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
240 PREAMBLE(DATA_BITS); \
241 { UInt cf, pf, af, zf, sf, of; \
242 cf = 0; \
243 pf = parity_table[(UChar)CC_DEP1]; \
244 af = 0; \
245 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
246 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
247 of = 0; \
248 return cf | pf | af | zf | sf | of; \
252 /*-------------------------------------------------------------*/
254 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
256 PREAMBLE(DATA_BITS); \
257 { UInt cf, pf, af, zf, sf, of; \
258 UInt argL, argR, res; \
259 res = CC_DEP1; \
260 argL = res - 1; \
261 argR = 1; \
262 cf = CC_NDEP & X86G_CC_MASK_C; \
263 pf = parity_table[(UChar)res]; \
264 af = (res ^ argL ^ argR) & 0x10; \
265 zf = ((DATA_UTYPE)res == 0) << 6; \
266 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
267 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
268 return cf | pf | af | zf | sf | of; \
272 /*-------------------------------------------------------------*/
274 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
276 PREAMBLE(DATA_BITS); \
277 { UInt cf, pf, af, zf, sf, of; \
278 UInt argL, argR, res; \
279 res = CC_DEP1; \
280 argL = res + 1; \
281 argR = 1; \
282 cf = CC_NDEP & X86G_CC_MASK_C; \
283 pf = parity_table[(UChar)res]; \
284 af = (res ^ argL ^ argR) & 0x10; \
285 zf = ((DATA_UTYPE)res == 0) << 6; \
286 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
287 of = ((res & DATA_MASK) \
288 == ((UInt)SIGN_MASK - 1)) << 11; \
289 return cf | pf | af | zf | sf | of; \
293 /*-------------------------------------------------------------*/
295 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
297 PREAMBLE(DATA_BITS); \
298 { UInt cf, pf, af, zf, sf, of; \
299 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
300 pf = parity_table[(UChar)CC_DEP1]; \
301 af = 0; /* undefined */ \
302 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
303 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
304 /* of is defined if shift count == 1 */ \
305 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
306 & X86G_CC_MASK_O; \
307 return cf | pf | af | zf | sf | of; \
311 /*-------------------------------------------------------------*/
313 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
315 PREAMBLE(DATA_BITS); \
316 { UInt cf, pf, af, zf, sf, of; \
317 cf = CC_DEP2 & 1; \
318 pf = parity_table[(UChar)CC_DEP1]; \
319 af = 0; /* undefined */ \
320 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
321 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
322 /* of is defined if shift count == 1 */ \
323 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
324 & X86G_CC_MASK_O; \
325 return cf | pf | af | zf | sf | of; \
329 /*-------------------------------------------------------------*/
331 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
332 /* DEP1 = result, NDEP = old flags */
333 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
335 PREAMBLE(DATA_BITS); \
336 { UInt fl \
337 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
338 | (X86G_CC_MASK_C & CC_DEP1) \
339 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
340 11-(DATA_BITS-1)) \
341 ^ lshift(CC_DEP1, 11))); \
342 return fl; \
346 /*-------------------------------------------------------------*/
348 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
349 /* DEP1 = result, NDEP = old flags */
350 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
352 PREAMBLE(DATA_BITS); \
353 { UInt fl \
354 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
355 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
356 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
357 11-(DATA_BITS-1)) \
358 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
359 return fl; \
363 /*-------------------------------------------------------------*/
365 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
366 DATA_U2TYPE, NARROWto2U) \
368 PREAMBLE(DATA_BITS); \
369 { UInt cf, pf, af, zf, sf, of; \
370 DATA_UTYPE hi; \
371 DATA_UTYPE lo \
372 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
373 * ((DATA_UTYPE)CC_DEP2) ); \
374 DATA_U2TYPE rr \
375 = NARROWto2U( \
376 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
377 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
378 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
379 cf = (hi != 0); \
380 pf = parity_table[(UChar)lo]; \
381 af = 0; /* undefined */ \
382 zf = (lo == 0) << 6; \
383 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
384 of = cf << 11; \
385 return cf | pf | af | zf | sf | of; \
389 /*-------------------------------------------------------------*/
391 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
392 DATA_S2TYPE, NARROWto2S) \
394 PREAMBLE(DATA_BITS); \
395 { UInt cf, pf, af, zf, sf, of; \
396 DATA_STYPE hi; \
397 DATA_STYPE lo \
398 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
399 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
400 DATA_S2TYPE rr \
401 = NARROWto2S( \
402 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
403 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
404 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
405 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
406 pf = parity_table[(UChar)lo]; \
407 af = 0; /* undefined */ \
408 zf = (lo == 0) << 6; \
409 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
410 of = cf << 11; \
411 return cf | pf | af | zf | sf | of; \
416 #if PROFILE_EFLAGS
418 static Bool initted = False;
420 /* C flag, fast route */
421 static UInt tabc_fast[X86G_CC_OP_NUMBER];
422 /* C flag, slow route */
423 static UInt tabc_slow[X86G_CC_OP_NUMBER];
424 /* table for calculate_cond */
425 static UInt tab_cond[X86G_CC_OP_NUMBER][16];
426 /* total entry counts for calc_all, calc_c, calc_cond. */
427 static UInt n_calc_all = 0;
428 static UInt n_calc_c = 0;
429 static UInt n_calc_cond = 0;
431 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
434 static void showCounts ( void )
436 Int op, co;
437 HChar ch;
438 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
439 n_calc_all, n_calc_cond, n_calc_c);
441 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
442 " S NS P NP L NL LE NLE\n");
443 vex_printf(" -----------------------------------------------------"
444 "----------------------------------------\n");
445 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
447 ch = ' ';
448 if (op > 0 && (op-1) % 3 == 0)
449 ch = 'B';
450 if (op > 0 && (op-1) % 3 == 1)
451 ch = 'W';
452 if (op > 0 && (op-1) % 3 == 2)
453 ch = 'L';
455 vex_printf("%2d%c: ", op, ch);
456 vex_printf("%6u ", tabc_slow[op]);
457 vex_printf("%6u ", tabc_fast[op]);
458 for (co = 0; co < 16; co++) {
459 Int n = tab_cond[op][co];
460 if (n >= 1000) {
461 vex_printf(" %3dK", n / 1000);
462 } else
463 if (n >= 0) {
464 vex_printf(" %3d ", n );
465 } else {
466 vex_printf(" ");
469 vex_printf("\n");
471 vex_printf("\n");
474 static void initCounts ( void )
476 Int op, co;
477 initted = True;
478 for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
479 tabc_fast[op] = tabc_slow[op] = 0;
480 for (co = 0; co < 16; co++)
481 tab_cond[op][co] = 0;
485 #endif /* PROFILE_EFLAGS */
488 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
489 /* Calculate all the 6 flags from the supplied thunk parameters.
490 Worker function, not directly called from generated code. */
491 static
492 UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
493 UInt cc_dep1_formal,
494 UInt cc_dep2_formal,
495 UInt cc_ndep_formal )
497 switch (cc_op) {
498 case X86G_CC_OP_COPY:
499 return cc_dep1_formal
500 & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
501 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
503 case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
504 case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
505 case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
507 case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
508 case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
509 case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
511 case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
512 case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
513 case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
515 case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
516 case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
517 case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
519 case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
520 case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
521 case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
523 case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
524 case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
525 case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
527 case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
528 case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
529 case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
531 case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
532 case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
533 case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
535 case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
536 case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
537 case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
539 case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
540 case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
541 case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
543 case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
544 case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
545 case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
547 case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
548 UShort, toUShort );
549 case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
550 UInt, toUInt );
551 case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
552 ULong, idULong );
554 case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
555 Short, toUShort );
556 case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
557 Int, toUInt );
558 case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
559 Long, idULong );
561 default:
562 /* shouldn't really make these calls from generated code */
563 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
564 "( %u, 0x%x, 0x%x, 0x%x )\n",
565 cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
566 vpanic("x86g_calculate_eflags_all_WRK(X86)");
570 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
571 /* Calculate all the 6 flags from the supplied thunk parameters. */
572 UInt x86g_calculate_eflags_all ( UInt cc_op,
573 UInt cc_dep1,
574 UInt cc_dep2,
575 UInt cc_ndep )
577 # if PROFILE_EFLAGS
578 if (!initted) initCounts();
579 n_calc_all++;
580 if (SHOW_COUNTS_NOW) showCounts();
581 # endif
582 return
583 x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
587 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
588 /* Calculate just the carry flag from the supplied thunk parameters. */
589 VEX_REGPARM(3)
590 UInt x86g_calculate_eflags_c ( UInt cc_op,
591 UInt cc_dep1,
592 UInt cc_dep2,
593 UInt cc_ndep )
595 # if PROFILE_EFLAGS
596 if (!initted) initCounts();
597 n_calc_c++;
598 tabc_fast[cc_op]++;
599 if (SHOW_COUNTS_NOW) showCounts();
600 # endif
602 /* Fast-case some common ones. */
603 switch (cc_op) {
604 case X86G_CC_OP_LOGICL:
605 case X86G_CC_OP_LOGICW:
606 case X86G_CC_OP_LOGICB:
607 return 0;
608 case X86G_CC_OP_SUBL:
609 return ((UInt)cc_dep1) < ((UInt)cc_dep2)
610 ? X86G_CC_MASK_C : 0;
611 case X86G_CC_OP_SUBW:
612 return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
613 ? X86G_CC_MASK_C : 0;
614 case X86G_CC_OP_SUBB:
615 return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
616 ? X86G_CC_MASK_C : 0;
617 case X86G_CC_OP_INCL:
618 case X86G_CC_OP_DECL:
619 return cc_ndep & X86G_CC_MASK_C;
620 default:
621 break;
624 # if PROFILE_EFLAGS
625 tabc_fast[cc_op]--;
626 tabc_slow[cc_op]++;
627 # endif
629 return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
630 & X86G_CC_MASK_C;
634 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
635 /* returns 1 or 0 */
636 UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
637 UInt cc_op,
638 UInt cc_dep1,
639 UInt cc_dep2,
640 UInt cc_ndep )
642 UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
643 cc_dep2, cc_ndep);
644 UInt of,sf,zf,cf,pf;
645 UInt inv = cond & 1;
647 # if PROFILE_EFLAGS
648 if (!initted) initCounts();
649 tab_cond[cc_op][cond]++;
650 n_calc_cond++;
651 if (SHOW_COUNTS_NOW) showCounts();
652 # endif
654 switch (cond) {
655 case X86CondNO:
656 case X86CondO: /* OF == 1 */
657 of = eflags >> X86G_CC_SHIFT_O;
658 return 1 & (inv ^ of);
660 case X86CondNZ:
661 case X86CondZ: /* ZF == 1 */
662 zf = eflags >> X86G_CC_SHIFT_Z;
663 return 1 & (inv ^ zf);
665 case X86CondNB:
666 case X86CondB: /* CF == 1 */
667 cf = eflags >> X86G_CC_SHIFT_C;
668 return 1 & (inv ^ cf);
669 break;
671 case X86CondNBE:
672 case X86CondBE: /* (CF or ZF) == 1 */
673 cf = eflags >> X86G_CC_SHIFT_C;
674 zf = eflags >> X86G_CC_SHIFT_Z;
675 return 1 & (inv ^ (cf | zf));
676 break;
678 case X86CondNS:
679 case X86CondS: /* SF == 1 */
680 sf = eflags >> X86G_CC_SHIFT_S;
681 return 1 & (inv ^ sf);
683 case X86CondNP:
684 case X86CondP: /* PF == 1 */
685 pf = eflags >> X86G_CC_SHIFT_P;
686 return 1 & (inv ^ pf);
688 case X86CondNL:
689 case X86CondL: /* (SF xor OF) == 1 */
690 sf = eflags >> X86G_CC_SHIFT_S;
691 of = eflags >> X86G_CC_SHIFT_O;
692 return 1 & (inv ^ (sf ^ of));
693 break;
695 case X86CondNLE:
696 case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
697 sf = eflags >> X86G_CC_SHIFT_S;
698 of = eflags >> X86G_CC_SHIFT_O;
699 zf = eflags >> X86G_CC_SHIFT_Z;
700 return 1 & (inv ^ ((sf ^ of) | zf));
701 break;
703 default:
704 /* shouldn't really make these calls from generated code */
705 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
706 cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
707 vpanic("x86g_calculate_condition");
712 /* VISIBLE TO LIBVEX CLIENT */
713 UInt LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State* vex_state )
715 UInt eflags = x86g_calculate_eflags_all_WRK(
716 vex_state->guest_CC_OP,
717 vex_state->guest_CC_DEP1,
718 vex_state->guest_CC_DEP2,
719 vex_state->guest_CC_NDEP
721 UInt dflag = vex_state->guest_DFLAG;
722 vassert(dflag == 1 || dflag == 0xFFFFFFFF);
723 if (dflag == 0xFFFFFFFF)
724 eflags |= X86G_CC_MASK_D;
725 if (vex_state->guest_IDFLAG == 1)
726 eflags |= X86G_CC_MASK_ID;
727 if (vex_state->guest_ACFLAG == 1)
728 eflags |= X86G_CC_MASK_AC;
730 return eflags;
733 /* VISIBLE TO LIBVEX CLIENT */
734 void
735 LibVEX_GuestX86_put_eflags ( UInt eflags,
736 /*MOD*/VexGuestX86State* vex_state )
738 /* D flag */
739 if (eflags & X86G_CC_MASK_D) {
740 vex_state->guest_DFLAG = 0xFFFFFFFF;
741 eflags &= ~X86G_CC_MASK_D;
743 else
744 vex_state->guest_DFLAG = 1;
746 /* ID flag */
747 if (eflags & X86G_CC_MASK_ID) {
748 vex_state->guest_IDFLAG = 1;
749 eflags &= ~X86G_CC_MASK_ID;
751 else
752 vex_state->guest_IDFLAG = 0;
754 /* AC flag */
755 if (eflags & X86G_CC_MASK_AC) {
756 vex_state->guest_ACFLAG = 1;
757 eflags &= ~X86G_CC_MASK_AC;
759 else
760 vex_state->guest_ACFLAG = 0;
762 UInt cc_mask = X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z |
763 X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P;
764 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
765 vex_state->guest_CC_DEP1 = eflags & cc_mask;
766 vex_state->guest_CC_DEP2 = 0;
767 vex_state->guest_CC_NDEP = 0;
770 /* VISIBLE TO LIBVEX CLIENT */
771 void
772 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
773 /*MOD*/VexGuestX86State* vex_state )
775 UInt oszacp = x86g_calculate_eflags_all_WRK(
776 vex_state->guest_CC_OP,
777 vex_state->guest_CC_DEP1,
778 vex_state->guest_CC_DEP2,
779 vex_state->guest_CC_NDEP
781 if (new_carry_flag & 1) {
782 oszacp |= X86G_CC_MASK_C;
783 } else {
784 oszacp &= ~X86G_CC_MASK_C;
786 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
787 vex_state->guest_CC_DEP1 = oszacp;
788 vex_state->guest_CC_DEP2 = 0;
789 vex_state->guest_CC_NDEP = 0;
792 /*---------------------------------------------------------------*/
793 /*--- %eflags translation-time function specialisers. ---*/
794 /*--- These help iropt specialise calls the above run-time ---*/
795 /*--- %eflags functions. ---*/
796 /*---------------------------------------------------------------*/
798 /* Used by the optimiser to try specialisations. Returns an
799 equivalent expression, or NULL if none. */
801 static inline Bool isU32 ( IRExpr* e, UInt n )
803 return
804 toBool( e->tag == Iex_Const
805 && e->Iex.Const.con->tag == Ico_U32
806 && e->Iex.Const.con->Ico.U32 == n );
809 IRExpr* guest_x86_spechelper ( const HChar* function_name,
810 IRExpr** args,
811 IRStmt** precedingStmts,
812 Int n_precedingStmts )
814 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
815 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
816 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
817 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
819 Int i, arity = 0;
820 for (i = 0; args[i]; i++)
821 arity++;
822 # if 0
823 vex_printf("spec request:\n");
824 vex_printf(" %s ", function_name);
825 for (i = 0; i < arity; i++) {
826 vex_printf(" ");
827 ppIRExpr(args[i]);
829 vex_printf("\n");
830 # endif
832 /* --------- specialising "x86g_calculate_condition" --------- */
834 if (vex_streq(function_name, "x86g_calculate_condition")) {
835 /* specialise calls to above "calculate condition" function */
836 IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
837 vassert(arity == 5);
838 cond = args[0];
839 cc_op = args[1];
840 cc_dep1 = args[2];
841 cc_dep2 = args[3];
843 /*---------------- ADDL ----------------*/
845 if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
846 /* long add, then Z --> test (dst+src == 0) */
847 return unop(Iop_1Uto32,
848 binop(Iop_CmpEQ32,
849 binop(Iop_Add32, cc_dep1, cc_dep2),
850 mkU32(0)));
853 /*---------------- SUBL ----------------*/
855 /* 4, 5 */
856 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
857 /* long sub/cmp, then Z --> test dst==src */
858 return unop(Iop_1Uto32,
859 binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
861 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
862 /* long sub/cmp, then NZ --> test dst!=src */
863 return unop(Iop_1Uto32,
864 binop(Iop_CmpNE32, cc_dep1, cc_dep2));
867 /* 12, 13 */
868 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
869 /* long sub/cmp, then L (signed less than)
870 --> test dst <s src */
871 return unop(Iop_1Uto32,
872 binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
874 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
875 /* long sub/cmp, then NL (signed greater than or equal)
876 --> test !(dst <s src) */
877 return binop(Iop_Xor32,
878 unop(Iop_1Uto32,
879 binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
880 mkU32(1));
883 /* 14, 15 */
884 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
885 /* long sub/cmp, then LE (signed less than or equal)
886 --> test dst <=s src */
887 return unop(Iop_1Uto32,
888 binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
890 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
891 /* long sub/cmp, then NLE (signed not less than or equal)
892 --> test dst >s src
893 --> test !(dst <=s src) */
894 return binop(Iop_Xor32,
895 unop(Iop_1Uto32,
896 binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
897 mkU32(1));
900 /* 6, 7 */
901 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
902 /* long sub/cmp, then BE (unsigned less than or equal)
903 --> test dst <=u src */
904 return unop(Iop_1Uto32,
905 binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
907 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
908 /* long sub/cmp, then BE (unsigned greater than)
909 --> test !(dst <=u src) */
910 return binop(Iop_Xor32,
911 unop(Iop_1Uto32,
912 binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
913 mkU32(1));
916 /* 2, 3 */
917 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
918 /* long sub/cmp, then B (unsigned less than)
919 --> test dst <u src */
920 return unop(Iop_1Uto32,
921 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
923 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
924 /* long sub/cmp, then NB (unsigned greater than or equal)
925 --> test !(dst <u src) */
926 return binop(Iop_Xor32,
927 unop(Iop_1Uto32,
928 binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
929 mkU32(1));
932 /* 8, 9 */
933 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)
934 && isU32(cc_dep2, 0)) {
935 /* long sub/cmp of zero, then S --> test (dst-0 <s 0)
936 --> test dst <s 0
937 --> (UInt)dst[31] */
938 return binop(Iop_And32,
939 binop(Iop_Shr32,cc_dep1,mkU8(31)),
940 mkU32(1));
942 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)
943 && isU32(cc_dep2, 0)) {
944 /* long sub/cmp of zero, then NS --> test !(dst-0 <s 0)
945 --> test !(dst <s 0)
946 --> (UInt) !dst[31] */
947 return binop(Iop_Xor32,
948 binop(Iop_And32,
949 binop(Iop_Shr32,cc_dep1,mkU8(31)),
950 mkU32(1)),
951 mkU32(1));
954 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
955 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
956 return unop(Iop_1Uto32,
957 binop(Iop_CmpLT32S,
958 binop(Iop_Sub32, cc_dep1, cc_dep2),
959 mkU32(0)));
961 if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
962 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
963 return binop(Iop_Xor32,
964 unop(Iop_1Uto32,
965 binop(Iop_CmpLT32S,
966 binop(Iop_Sub32, cc_dep1, cc_dep2),
967 mkU32(0))),
968 mkU32(1));
971 /*---------------- SUBW ----------------*/
973 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
974 /* word sub/cmp, then Z --> test dst==src */
975 return unop(Iop_1Uto32,
976 binop(Iop_CmpEQ16,
977 unop(Iop_32to16,cc_dep1),
978 unop(Iop_32to16,cc_dep2)));
980 if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
981 /* word sub/cmp, then NZ --> test dst!=src */
982 return unop(Iop_1Uto32,
983 binop(Iop_CmpNE16,
984 unop(Iop_32to16,cc_dep1),
985 unop(Iop_32to16,cc_dep2)));
988 /*---------------- SUBB ----------------*/
990 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
991 /* byte sub/cmp, then Z --> test dst==src */
992 return unop(Iop_1Uto32,
993 binop(Iop_CmpEQ8,
994 unop(Iop_32to8,cc_dep1),
995 unop(Iop_32to8,cc_dep2)));
997 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
998 /* byte sub/cmp, then NZ --> test dst!=src */
999 return unop(Iop_1Uto32,
1000 binop(Iop_CmpNE8,
1001 unop(Iop_32to8,cc_dep1),
1002 unop(Iop_32to8,cc_dep2)));
1005 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
1006 /* byte sub/cmp, then NBE (unsigned greater than)
1007 --> test src <u dst */
1008 /* Note, args are opposite way round from the usual */
1009 return unop(Iop_1Uto32,
1010 binop(Iop_CmpLT32U,
1011 binop(Iop_And32,cc_dep2,mkU32(0xFF)),
1012 binop(Iop_And32,cc_dep1,mkU32(0xFF))));
1015 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
1016 && isU32(cc_dep2, 0)) {
1017 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
1018 --> test dst <s 0
1019 --> (UInt)dst[7]
1020 This is yet another scheme by which gcc figures out if the
1021 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
1022 /* Note: isU32(cc_dep2, 0) is correct, even though this is
1023 for an 8-bit comparison, since the args to the helper
1024 function are always U32s. */
1025 return binop(Iop_And32,
1026 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1027 mkU32(1));
1029 if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
1030 && isU32(cc_dep2, 0)) {
1031 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1032 --> test !(dst <s 0)
1033 --> (UInt) !dst[7]
1035 return binop(Iop_Xor32,
1036 binop(Iop_And32,
1037 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1038 mkU32(1)),
1039 mkU32(1));
1042 /*---------------- LOGICL ----------------*/
1044 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
1045 /* long and/or/xor, then Z --> test dst==0 */
1046 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1048 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
1049 /* long and/or/xor, then NZ --> test dst!=0 */
1050 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1053 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
1054 /* long and/or/xor, then LE
1055 This is pretty subtle. LOGIC sets SF and ZF according to the
1056 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
1057 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1058 the result is <=signed 0. Hence ...
1060 return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
1063 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
1064 /* long and/or/xor, then BE
1065 LOGIC sets ZF according to the result and makes CF be zero.
1066 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1067 -- which will be 1 iff the result is zero. Hence ...
1069 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1072 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
1073 /* see comment below for (LOGICB, CondS) */
1074 /* long and/or/xor, then S --> (UInt)result[31] */
1075 return binop(Iop_And32,
1076 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1077 mkU32(1));
1079 if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
1080 /* see comment below for (LOGICB, CondNS) */
1081 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1082 return binop(Iop_Xor32,
1083 binop(Iop_And32,
1084 binop(Iop_Shr32,cc_dep1,mkU8(31)),
1085 mkU32(1)),
1086 mkU32(1));
1089 /*---------------- LOGICW ----------------*/
1091 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
1092 /* word and/or/xor, then Z --> test dst==0 */
1093 return unop(Iop_1Uto32,
1094 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
1095 mkU32(0)));
1098 if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
1099 /* see comment below for (LOGICB, CondS) */
1100 /* word and/or/xor, then S --> (UInt)result[15] */
1101 return binop(Iop_And32,
1102 binop(Iop_Shr32,cc_dep1,mkU8(15)),
1103 mkU32(1));
1106 /*---------------- LOGICB ----------------*/
1108 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
1109 /* byte and/or/xor, then Z --> test dst==0 */
1110 return unop(Iop_1Uto32,
1111 binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
1112 mkU32(0)));
1114 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
1115 /* byte and/or/xor, then Z --> test dst!=0 */
1116 /* b9ac9: 84 c0 test %al,%al
1117 b9acb: 75 0d jne b9ada */
1118 return unop(Iop_1Uto32,
1119 binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
1120 mkU32(0)));
1123 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
1124 /* this is an idiom gcc sometimes uses to find out if the top
1125 bit of a byte register is set: eg testb %al,%al; js ..
1126 Since it just depends on the top bit of the byte, extract
1127 that bit and explicitly get rid of all the rest. This
1128 helps memcheck avoid false positives in the case where any
1129 of the other bits in the byte are undefined. */
1130 /* byte and/or/xor, then S --> (UInt)result[7] */
1131 return binop(Iop_And32,
1132 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1133 mkU32(1));
1135 if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
1136 /* ditto, for negation-of-S. */
1137 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1138 return binop(Iop_Xor32,
1139 binop(Iop_And32,
1140 binop(Iop_Shr32,cc_dep1,mkU8(7)),
1141 mkU32(1)),
1142 mkU32(1));
1145 /*---------------- DECL ----------------*/
1147 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
1148 /* dec L, then Z --> test dst == 0 */
1149 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1152 if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
1153 /* dec L, then S --> compare DST <s 0 */
1154 return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
1157 /*---------------- DECW ----------------*/
1159 if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
1160 /* dec W, then Z --> test dst == 0 */
1161 return unop(Iop_1Uto32,
1162 binop(Iop_CmpEQ32,
1163 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1164 mkU32(0)));
1167 /*---------------- INCW ----------------*/
1169 if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
1170 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1171 /* inc W, then Z --> test dst == 0 */
1172 return unop(Iop_1Uto32,
1173 binop(Iop_CmpEQ32,
1174 binop(Iop_Shl32,cc_dep1,mkU8(16)),
1175 mkU32(0)));
1178 /*---------------- SHRL ----------------*/
1180 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
1181 /* SHRL, then Z --> test dep1(result) == 0 */
1182 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1184 if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondNZ)) {
1185 /* SHRL, then NZ --> test dep1(result) != 0 */
1186 return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1189 /*---------------- SHLL ----------------*/
1191 if (isU32(cc_op, X86G_CC_OP_SHLL) && isU32(cond, X86CondZ)) {
1192 /* SHLL, then Z --> test dep1(result) == 0 */
1193 return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
1195 //if (isU32(cc_op, X86G_CC_OP_SHLL) && isU32(cond, X86CondNZ)) {
1196 // /* SHLL, then NZ --> test dep1(result) != 0 */
1197 // vassert(0); // No test case yet observed
1200 /*---------------- COPY ----------------*/
1201 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1202 fnstsw %ax ; sahf ; jbe" for example. */
1204 if (isU32(cc_op, X86G_CC_OP_COPY) &&
1205 (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
1206 /* COPY, then BE --> extract C and Z from dep1, and test
1207 (C or Z) == 1. */
1208 /* COPY, then NBE --> extract C and Z from dep1, and test
1209 (C or Z) == 0. */
1210 UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
1211 return
1212 unop(
1213 Iop_1Uto32,
1214 binop(
1215 Iop_CmpEQ32,
1216 binop(
1217 Iop_And32,
1218 binop(
1219 Iop_Or32,
1220 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1221 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
1223 mkU32(1)
1225 mkU32(nnn)
1230 if (isU32(cc_op, X86G_CC_OP_COPY)
1231 && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
1232 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1233 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1234 UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
1235 return
1236 unop(
1237 Iop_1Uto32,
1238 binop(
1239 Iop_CmpEQ32,
1240 binop(
1241 Iop_And32,
1242 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1243 mkU32(1)
1245 mkU32(nnn)
1250 if (isU32(cc_op, X86G_CC_OP_COPY)
1251 && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
1252 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1253 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1254 UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
1255 return
1256 unop(
1257 Iop_1Uto32,
1258 binop(
1259 Iop_CmpEQ32,
1260 binop(
1261 Iop_And32,
1262 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
1263 mkU32(1)
1265 mkU32(nnn)
1270 if (isU32(cc_op, X86G_CC_OP_COPY)
1271 && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
1272 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1273 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1274 UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
1275 return
1276 unop(
1277 Iop_1Uto32,
1278 binop(
1279 Iop_CmpEQ32,
1280 binop(
1281 Iop_And32,
1282 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
1283 mkU32(1)
1285 mkU32(nnn)
1290 return NULL;
1293 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1295 if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
1296 /* specialise calls to above "calculate_eflags_c" function */
1297 IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1298 vassert(arity == 4);
1299 cc_op = args[0];
1300 cc_dep1 = args[1];
1301 cc_dep2 = args[2];
1302 cc_ndep = args[3];
1304 if (isU32(cc_op, X86G_CC_OP_SUBL)) {
1305 /* C after sub denotes unsigned less than */
1306 return unop(Iop_1Uto32,
1307 binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
1309 if (isU32(cc_op, X86G_CC_OP_SUBB)) {
1310 /* C after sub denotes unsigned less than */
1311 return unop(Iop_1Uto32,
1312 binop(Iop_CmpLT32U,
1313 binop(Iop_And32,cc_dep1,mkU32(0xFF)),
1314 binop(Iop_And32,cc_dep2,mkU32(0xFF))));
1316 if (isU32(cc_op, X86G_CC_OP_LOGICL)
1317 || isU32(cc_op, X86G_CC_OP_LOGICW)
1318 || isU32(cc_op, X86G_CC_OP_LOGICB)) {
1319 /* cflag after logic is zero */
1320 return mkU32(0);
1322 if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
1323 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1324 return cc_ndep;
1326 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1327 /* cflag after COPY is stored in DEP1. */
1328 return
1329 binop(
1330 Iop_And32,
1331 binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
1332 mkU32(1)
1335 if (isU32(cc_op, X86G_CC_OP_ADDL)) {
1336 /* C after add denotes sum <u either arg */
1337 return unop(Iop_1Uto32,
1338 binop(Iop_CmpLT32U,
1339 binop(Iop_Add32, cc_dep1, cc_dep2),
1340 cc_dep1));
1342 // ATC, requires verification, no test case known
1343 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1344 // /* C after signed widening multiply denotes the case where
1345 // the top half of the result isn't simply the sign extension
1346 // of the bottom half (iow the result doesn't fit completely
1347 // in the bottom half). Hence:
1348 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1349 // where 'x' denotes signed widening multiply.*/
1350 // return
1351 // unop(Iop_1Uto32,
1352 // binop(Iop_CmpNE32,
1353 // unop(Iop_64HIto32,
1354 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1355 // binop(Iop_Sar32,
1356 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1358 # if 0
1359 if (cc_op->tag == Iex_Const) {
1360 vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
1362 # endif
1364 return NULL;
1367 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1369 if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
1370 /* specialise calls to above "calculate_eflags_all" function */
1371 IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
1372 vassert(arity == 4);
1373 cc_op = args[0];
1374 cc_dep1 = args[1];
1375 /* cc_dep2 = args[2]; */
1376 /* cc_ndep = args[3]; */
1378 if (isU32(cc_op, X86G_CC_OP_COPY)) {
1379 /* eflags after COPY are stored in DEP1. */
1380 return
1381 binop(
1382 Iop_And32,
1383 cc_dep1,
1384 mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
1385 | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
1388 return NULL;
1391 # undef unop
1392 # undef binop
1393 # undef mkU32
1394 # undef mkU8
1396 return NULL;
1400 /*---------------------------------------------------------------*/
1401 /*--- Supporting functions for x87 FPU activities. ---*/
1402 /*---------------------------------------------------------------*/
1404 static inline Bool host_is_little_endian ( void )
1406 UInt x = 0x76543210;
1407 UChar* p = (UChar*)(&x);
1408 return toBool(*p == 0x10);
1411 /* 80 and 64-bit floating point formats:
1413 80-bit:
1415 S 0 0-------0 zero
1416 S 0 0X------X denormals
1417 S 1-7FFE 1X------X normals (all normals have leading 1)
1418 S 7FFF 10------0 infinity
1419 S 7FFF 10X-----X snan
1420 S 7FFF 11X-----X qnan
1422 S is the sign bit. For runs X----X, at least one of the Xs must be
1423 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1424 there is an explicitly represented leading 1, and a sign bit,
1425 giving 80 in total.
1427 64-bit avoids the confusion of an explicitly represented leading 1
1428 and so is simpler:
1430 S 0 0------0 zero
1431 S 0 X------X denormals
1432 S 1-7FE any normals
1433 S 7FF 0------0 infinity
1434 S 7FF 0X-----X snan
1435 S 7FF 1X-----X qnan
1437 Exponent is 11 bits, fractional part is 52 bits, and there is a
1438 sign bit, giving 64 in total.
1441 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1442 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1443 UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
1445 Bool mantissaIsZero;
1446 Int bexp;
1447 UChar sign;
1448 UChar* f64;
1450 vassert(host_is_little_endian());
1452 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1454 f64 = (UChar*)(&dbl);
1455 sign = toUChar( (f64[7] >> 7) & 1 );
1457 /* First off, if the tag indicates the register was empty,
1458 return 1,0,sign,1 */
1459 if (tag == 0) {
1460 /* vex_printf("Empty\n"); */
1461 return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
1462 | X86G_FC_MASK_C0;
1465 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
1466 bexp &= 0x7FF;
1468 mantissaIsZero
1469 = toBool(
1470 (f64[6] & 0x0F) == 0
1471 && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
1474 /* If both exponent and mantissa are zero, the value is zero.
1475 Return 1,0,sign,0. */
1476 if (bexp == 0 && mantissaIsZero) {
1477 /* vex_printf("Zero\n"); */
1478 return X86G_FC_MASK_C3 | 0
1479 | (sign << X86G_FC_SHIFT_C1) | 0;
1482 /* If exponent is zero but mantissa isn't, it's a denormal.
1483 Return 1,1,sign,0. */
1484 if (bexp == 0 && !mantissaIsZero) {
1485 /* vex_printf("Denormal\n"); */
1486 return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
1487 | (sign << X86G_FC_SHIFT_C1) | 0;
1490 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1491 Return 0,1,sign,1. */
1492 if (bexp == 0x7FF && mantissaIsZero) {
1493 /* vex_printf("Inf\n"); */
1494 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
1495 | X86G_FC_MASK_C0;
1498 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1499 Return 0,0,sign,1. */
1500 if (bexp == 0x7FF && !mantissaIsZero) {
1501 /* vex_printf("NaN\n"); */
1502 return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
1505 /* Uh, ok, we give up. It must be a normal finite number.
1506 Return 0,1,sign,0.
1508 /* vex_printf("normal\n"); */
1509 return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
1513 /* CALLED FROM GENERATED CODE */
1514 /* DIRTY HELPER (reads guest memory) */
1515 ULong x86g_dirtyhelper_loadF80le ( Addr addrU )
1517 ULong f64;
1518 convert_f80le_to_f64le ( (UChar*)addrU, (UChar*)&f64 );
1519 return f64;
1522 /* CALLED FROM GENERATED CODE */
1523 /* DIRTY HELPER (writes guest memory) */
1524 void x86g_dirtyhelper_storeF80le ( Addr addrU, ULong f64 )
1526 convert_f64le_to_f80le( (UChar*)&f64, (UChar*)addrU );
1530 /*----------------------------------------------*/
1531 /*--- The exported fns .. ---*/
1532 /*----------------------------------------------*/
1534 /* Layout of the real x87 state. */
1535 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1536 g_generic_x87.h */
1539 /* CLEAN HELPER */
1540 /* fpucw[15:0] contains a x87 native format FPU control word.
1541 Extract from it the required FPROUND value and any resulting
1542 emulation warning, and return (warn << 32) | fpround value.
1544 ULong x86g_check_fldcw ( UInt fpucw )
1546 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1547 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1548 UInt rmode = (fpucw >> 10) & 3;
1550 /* Detect any required emulation warnings. */
1551 VexEmNote ew = EmNote_NONE;
1553 if ((fpucw & 0x3F) != 0x3F) {
1554 /* unmasked exceptions! */
1555 ew = EmWarn_X86_x87exns;
1557 else
1558 if (((fpucw >> 8) & 3) != 3) {
1559 /* unsupported precision */
1560 ew = EmWarn_X86_x87precision;
1563 return (((ULong)ew) << 32) | ((ULong)rmode);
1566 /* CLEAN HELPER */
1567 /* Given fpround as an IRRoundingMode value, create a suitable x87
1568 native format FPU control word. */
1569 UInt x86g_create_fpucw ( UInt fpround )
1571 fpround &= 3;
1572 return 0x037F | (fpround << 10);
1576 /* CLEAN HELPER */
1577 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1578 Extract from it the required SSEROUND value and any resulting
1579 emulation warning, and return (warn << 32) | sseround value.
1581 ULong x86g_check_ldmxcsr ( UInt mxcsr )
1583 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1584 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1585 UInt rmode = (mxcsr >> 13) & 3;
1587 /* Detect any required emulation warnings. */
1588 VexEmNote ew = EmNote_NONE;
1590 if ((mxcsr & 0x1F80) != 0x1F80) {
1591 /* unmasked exceptions! */
1592 ew = EmWarn_X86_sseExns;
1594 else
1595 if (mxcsr & (1<<15)) {
1596 /* FZ is set */
1597 ew = EmWarn_X86_fz;
1599 else
1600 if (mxcsr & (1<<6)) {
1601 /* DAZ is set */
1602 ew = EmWarn_X86_daz;
1605 return (((ULong)ew) << 32) | ((ULong)rmode);
1609 /* CLEAN HELPER */
1610 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1611 native format MXCSR value. */
1612 UInt x86g_create_mxcsr ( UInt sseround )
1614 sseround &= 3;
1615 return 0x1F80 | (sseround << 13);
1619 /* CALLED FROM GENERATED CODE */
1620 /* DIRTY HELPER (writes guest state) */
1621 /* Initialise the x87 FPU state as per 'finit'. */
1622 void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
1624 Int i;
1625 gst->guest_FTOP = 0;
1626 for (i = 0; i < 8; i++) {
1627 gst->guest_FPTAG[i] = 0; /* empty */
1628 gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
1630 gst->guest_FPROUND = (UInt)Irrm_NEAREST;
1631 gst->guest_FC3210 = 0;
1635 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1636 appears to differ from the former only in that the 8 FP registers
1637 themselves are not transferred into the guest state. */
1638 static
1639 VexEmNote do_put_x87 ( Bool moveRegs,
1640 /*IN*/Fpu_State* x87_state,
1641 /*OUT*/VexGuestX86State* vex_state )
1643 Int stno, preg;
1644 UInt tag;
1645 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1646 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1647 UInt ftop = (x87_state->env[FP_ENV_STAT] >> 11) & 7;
1648 UInt tagw = x87_state->env[FP_ENV_TAG];
1649 UInt fpucw = x87_state->env[FP_ENV_CTRL];
1650 UInt c3210 = x87_state->env[FP_ENV_STAT] & 0x4700;
1651 VexEmNote ew;
1652 UInt fpround;
1653 ULong pair;
1655 /* Copy registers and tags */
1656 for (stno = 0; stno < 8; stno++) {
1657 preg = (stno + ftop) & 7;
1658 tag = (tagw >> (2*preg)) & 3;
1659 if (tag == 3) {
1660 /* register is empty */
1661 /* hmm, if it's empty, does it still get written? Probably
1662 safer to say it does. If we don't, memcheck could get out
1663 of sync, in that it thinks all FP registers are defined by
1664 this helper, but in reality some have not been updated. */
1665 if (moveRegs)
1666 vexRegs[preg] = 0; /* IEEE754 64-bit zero */
1667 vexTags[preg] = 0;
1668 } else {
1669 /* register is non-empty */
1670 if (moveRegs)
1671 convert_f80le_to_f64le( &x87_state->reg[10*stno],
1672 (UChar*)&vexRegs[preg] );
1673 vexTags[preg] = 1;
1677 /* stack pointer */
1678 vex_state->guest_FTOP = ftop;
1680 /* status word */
1681 vex_state->guest_FC3210 = c3210;
1683 /* handle the control word, setting FPROUND and detecting any
1684 emulation warnings. */
1685 pair = x86g_check_fldcw ( (UInt)fpucw );
1686 fpround = (UInt)pair;
1687 ew = (VexEmNote)(pair >> 32);
1689 vex_state->guest_FPROUND = fpround & 3;
1691 /* emulation warnings --> caller */
1692 return ew;
1696 /* Create an x87 FPU state from the guest state, as close as
1697 we can approximate it. */
1698 static
1699 void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1700 /*OUT*/Fpu_State* x87_state )
1702 Int i, stno, preg;
1703 UInt tagw;
1704 ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
1705 UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
1706 UInt ftop = vex_state->guest_FTOP;
1707 UInt c3210 = vex_state->guest_FC3210;
1709 for (i = 0; i < 14; i++)
1710 x87_state->env[i] = 0;
1712 x87_state->env[1] = x87_state->env[3] = x87_state->env[5]
1713 = x87_state->env[13] = 0xFFFF;
1714 x87_state->env[FP_ENV_STAT]
1715 = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
1716 x87_state->env[FP_ENV_CTRL]
1717 = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
1719 /* Dump the register stack in ST order. */
1720 tagw = 0;
1721 for (stno = 0; stno < 8; stno++) {
1722 preg = (stno + ftop) & 7;
1723 if (vexTags[preg] == 0) {
1724 /* register is empty */
1725 tagw |= (3 << (2*preg));
1726 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1727 &x87_state->reg[10*stno] );
1728 } else {
1729 /* register is full. */
1730 tagw |= (0 << (2*preg));
1731 convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
1732 &x87_state->reg[10*stno] );
1735 x87_state->env[FP_ENV_TAG] = toUShort(tagw);
1739 /* CALLED FROM GENERATED CODE */
1740 /* DIRTY HELPER (reads guest state, writes guest mem) */
1741 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
1743 /* Somewhat roundabout, but at least it's simple. */
1744 Fpu_State tmp;
1745 UShort* addrS = (UShort*)addr;
1746 UChar* addrC = (UChar*)addr;
1747 U128* xmm = (U128*)(addr + 160);
1748 UInt mxcsr;
1749 UShort fp_tags;
1750 UInt summary_tags;
1751 Int r, stno;
1752 UShort *srcS, *dstS;
1754 do_get_x87( gst, &tmp );
1755 mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
1757 /* Now build the proper fxsave image from the x87 image we just
1758 made. */
1760 addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
1761 addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
1763 /* set addrS[2] in an endian-independent way */
1764 summary_tags = 0;
1765 fp_tags = tmp.env[FP_ENV_TAG];
1766 for (r = 0; r < 8; r++) {
1767 if ( ((fp_tags >> (2*r)) & 3) != 3 )
1768 summary_tags |= (1 << r);
1770 addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
1771 addrC[5] = 0; /* pad */
1773 addrS[3] = 0; /* FOP: fpu opcode (bogus) */
1774 addrS[4] = 0;
1775 addrS[5] = 0; /* FPU IP (bogus) */
1776 addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
1777 could conceivably dump %CS here) */
1779 addrS[7] = 0; /* Intel reserved */
1781 addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
1782 addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
1783 addrS[10] = 0; /* segment selector for above operand pointer; %DS
1784 perhaps? */
1785 addrS[11] = 0; /* Intel reserved */
1787 addrS[12] = toUShort(mxcsr); /* MXCSR */
1788 addrS[13] = toUShort(mxcsr >> 16);
1790 addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1791 addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1793 /* Copy in the FP registers, in ST order. */
1794 for (stno = 0; stno < 8; stno++) {
1795 srcS = (UShort*)(&tmp.reg[10*stno]);
1796 dstS = (UShort*)(&addrS[16 + 8*stno]);
1797 dstS[0] = srcS[0];
1798 dstS[1] = srcS[1];
1799 dstS[2] = srcS[2];
1800 dstS[3] = srcS[3];
1801 dstS[4] = srcS[4];
1802 dstS[5] = 0;
1803 dstS[6] = 0;
1804 dstS[7] = 0;
1807 /* That's the first 160 bytes of the image done. Now only %xmm0
1808 .. %xmm7 remain to be copied. If the host is big-endian, these
1809 need to be byte-swapped. */
1810 vassert(host_is_little_endian());
1812 # define COPY_U128(_dst,_src) \
1813 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1814 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1815 while (0)
1817 COPY_U128( xmm[0], gst->guest_XMM0 );
1818 COPY_U128( xmm[1], gst->guest_XMM1 );
1819 COPY_U128( xmm[2], gst->guest_XMM2 );
1820 COPY_U128( xmm[3], gst->guest_XMM3 );
1821 COPY_U128( xmm[4], gst->guest_XMM4 );
1822 COPY_U128( xmm[5], gst->guest_XMM5 );
1823 COPY_U128( xmm[6], gst->guest_XMM6 );
1824 COPY_U128( xmm[7], gst->guest_XMM7 );
1826 # undef COPY_U128
1830 /* CALLED FROM GENERATED CODE */
1831 /* DIRTY HELPER (writes guest state, reads guest mem) */
1832 VexEmNote x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
1834 Fpu_State tmp;
1835 VexEmNote warnX87 = EmNote_NONE;
1836 VexEmNote warnXMM = EmNote_NONE;
1837 UShort* addrS = (UShort*)addr;
1838 UChar* addrC = (UChar*)addr;
1839 U128* xmm = (U128*)(addr + 160);
1840 UShort fp_tags;
1841 Int r, stno, i;
1843 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1844 to be byte-swapped. */
1845 vassert(host_is_little_endian());
1847 # define COPY_U128(_dst,_src) \
1848 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1849 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1850 while (0)
1852 COPY_U128( gst->guest_XMM0, xmm[0] );
1853 COPY_U128( gst->guest_XMM1, xmm[1] );
1854 COPY_U128( gst->guest_XMM2, xmm[2] );
1855 COPY_U128( gst->guest_XMM3, xmm[3] );
1856 COPY_U128( gst->guest_XMM4, xmm[4] );
1857 COPY_U128( gst->guest_XMM5, xmm[5] );
1858 COPY_U128( gst->guest_XMM6, xmm[6] );
1859 COPY_U128( gst->guest_XMM7, xmm[7] );
1861 # undef COPY_U128
1863 /* Copy the x87 registers out of the image, into a temporary
1864 Fpu_State struct. */
1866 /* LLVM on Darwin turns the following loop into a movaps plus a
1867 handful of scalar stores. This would work fine except for the
1868 fact that VEX doesn't keep the stack correctly (16-) aligned for
1869 the call, so it segfaults. Hence, split the loop into two
1870 pieces (and pray LLVM doesn't merely glue them back together) so
1871 it's composed only of scalar stores and so is alignment
1872 insensitive. Of course this is a kludge of the lamest kind --
1873 VEX should be fixed properly. */
1874 /* Code that seems to trigger the problem:
1875 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1876 for (i = 0; i < 7; i++) tmp.env[i+0] = 0;
1877 __asm__ __volatile__("" ::: "memory");
1878 for (i = 0; i < 7; i++) tmp.env[i+7] = 0;
1880 for (i = 0; i < 80; i++) tmp.reg[i] = 0;
1881 /* fill in tmp.reg[0..7] */
1882 for (stno = 0; stno < 8; stno++) {
1883 UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
1884 UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
1885 dstS[0] = srcS[0];
1886 dstS[1] = srcS[1];
1887 dstS[2] = srcS[2];
1888 dstS[3] = srcS[3];
1889 dstS[4] = srcS[4];
1891 /* fill in tmp.env[0..13] */
1892 tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
1893 tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
1895 fp_tags = 0;
1896 for (r = 0; r < 8; r++) {
1897 if (addrC[4] & (1<<r))
1898 fp_tags |= (0 << (2*r)); /* EMPTY */
1899 else
1900 fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
1902 tmp.env[FP_ENV_TAG] = fp_tags;
1904 /* Now write 'tmp' into the guest state. */
1905 warnX87 = do_put_x87( True/*moveRegs*/, &tmp, gst );
1907 { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
1908 | ((((UInt)addrS[13]) & 0xFFFF) << 16);
1909 ULong w64 = x86g_check_ldmxcsr( w32 );
1911 warnXMM = (VexEmNote)(w64 >> 32);
1913 gst->guest_SSEROUND = w64 & 0xFFFFFFFF;
1916 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1917 if (warnX87 != EmNote_NONE)
1918 return warnX87;
1919 else
1920 return warnXMM;
1924 /* CALLED FROM GENERATED CODE */
1925 /* DIRTY HELPER (reads guest state, writes guest mem) */
1926 void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
1928 do_get_x87( gst, (Fpu_State*)addr );
1931 /* CALLED FROM GENERATED CODE */
1932 /* DIRTY HELPER (writes guest state, reads guest mem) */
1933 VexEmNote x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
1935 return do_put_x87( True/*regs too*/, (Fpu_State*)addr, gst );
1938 /* CALLED FROM GENERATED CODE */
1939 /* DIRTY HELPER (reads guest state, writes guest mem) */
1940 void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
1942 /* Somewhat roundabout, but at least it's simple. */
1943 Int i;
1944 UShort* addrP = (UShort*)addr;
1945 Fpu_State tmp;
1946 do_get_x87( gst, &tmp );
1947 for (i = 0; i < 14; i++)
1948 addrP[i] = tmp.env[i];
1951 /* CALLED FROM GENERATED CODE */
1952 /* DIRTY HELPER (writes guest state, reads guest mem) */
1953 VexEmNote x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
1955 return do_put_x87( False/*don't move regs*/, (Fpu_State*)addr, gst);
1958 /* VISIBLE TO LIBVEX CLIENT */
1959 /* Do x87 save from the supplied VexGuestX86State structure and store the
1960 result at the given address which represents a buffer of at least 108
1961 bytes. */
1962 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State* vex_state,
1963 /*OUT*/UChar* x87_state )
1965 do_get_x87 ( vex_state, (Fpu_State*)x87_state );
1968 /* VISIBLE TO LIBVEX CLIENT */
1969 /* Do x87 restore from the supplied address and store read values to the given
1970 VexGuestX86State structure. */
1971 VexEmNote LibVEX_GuestX86_put_x87 ( /*IN*/UChar* x87_state,
1972 /*MOD*/VexGuestX86State* vex_state )
1974 return do_put_x87 ( True/*moveRegs*/, (Fpu_State*)x87_state, vex_state );
1977 /* VISIBLE TO LIBVEX CLIENT */
1978 /* Return mxcsr from the supplied VexGuestX86State structure. */
1979 UInt LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State* vex_state )
1981 return x86g_create_mxcsr ( vex_state->guest_SSEROUND );
1984 /* VISIBLE TO LIBVEX CLIENT */
1985 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1986 value. */
1987 VexEmNote LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr,
1988 /*MOD*/VexGuestX86State* vex_state)
1990 ULong w64 = x86g_check_ldmxcsr( mxcsr );
1991 vex_state->guest_SSEROUND = w64 & 0xFFFFFFFF;
1992 return (VexEmNote)(w64 >> 32);
1995 /*---------------------------------------------------------------*/
1996 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1997 /*---------------------------------------------------------------*/
1999 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2000 /* Calculate both flags and value result for rotate right
2001 through the carry bit. Result in low 32 bits,
2002 new flags (OSZACP) in high 32 bits.
2004 ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2006 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2008 switch (sz) {
2009 case 4:
2010 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2011 of = ((arg >> 31) ^ cf) & 1;
2012 while (tempCOUNT > 0) {
2013 tempcf = arg & 1;
2014 arg = (arg >> 1) | (cf << 31);
2015 cf = tempcf;
2016 tempCOUNT--;
2018 break;
2019 case 2:
2020 while (tempCOUNT >= 17) tempCOUNT -= 17;
2021 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2022 of = ((arg >> 15) ^ cf) & 1;
2023 while (tempCOUNT > 0) {
2024 tempcf = arg & 1;
2025 arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
2026 cf = tempcf;
2027 tempCOUNT--;
2029 break;
2030 case 1:
2031 while (tempCOUNT >= 9) tempCOUNT -= 9;
2032 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2033 of = ((arg >> 7) ^ cf) & 1;
2034 while (tempCOUNT > 0) {
2035 tempcf = arg & 1;
2036 arg = ((arg >> 1) & 0x7F) | (cf << 7);
2037 cf = tempcf;
2038 tempCOUNT--;
2040 break;
2041 default:
2042 vpanic("calculate_RCR: invalid size");
2045 cf &= 1;
2046 of &= 1;
2047 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2048 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2050 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2054 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2055 /* Calculate both flags and value result for rotate left
2056 through the carry bit. Result in low 32 bits,
2057 new flags (OSZACP) in high 32 bits.
2059 ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
2061 UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
2063 switch (sz) {
2064 case 4:
2065 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2066 while (tempCOUNT > 0) {
2067 tempcf = (arg >> 31) & 1;
2068 arg = (arg << 1) | (cf & 1);
2069 cf = tempcf;
2070 tempCOUNT--;
2072 of = ((arg >> 31) ^ cf) & 1;
2073 break;
2074 case 2:
2075 while (tempCOUNT >= 17) tempCOUNT -= 17;
2076 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2077 while (tempCOUNT > 0) {
2078 tempcf = (arg >> 15) & 1;
2079 arg = 0xFFFF & ((arg << 1) | (cf & 1));
2080 cf = tempcf;
2081 tempCOUNT--;
2083 of = ((arg >> 15) ^ cf) & 1;
2084 break;
2085 case 1:
2086 while (tempCOUNT >= 9) tempCOUNT -= 9;
2087 cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
2088 while (tempCOUNT > 0) {
2089 tempcf = (arg >> 7) & 1;
2090 arg = 0xFF & ((arg << 1) | (cf & 1));
2091 cf = tempcf;
2092 tempCOUNT--;
2094 of = ((arg >> 7) ^ cf) & 1;
2095 break;
2096 default:
2097 vpanic("calculate_RCL: invalid size");
2100 cf &= 1;
2101 of &= 1;
2102 eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
2103 eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
2105 return (((ULong)eflags_in) << 32) | ((ULong)arg);
2109 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2110 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2111 AX value in low half of arg, OSZACP in upper half.
2112 See guest-x86/toIR.c usage point for details.
2114 static UInt calc_parity_8bit ( UInt w32 ) {
2115 UInt i;
2116 UInt p = 1;
2117 for (i = 0; i < 8; i++)
2118 p ^= (1 & (w32 >> i));
2119 return p;
2121 UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
2123 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2124 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2125 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2126 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2127 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2128 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2129 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2130 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2131 UInt result = 0;
2133 switch (opcode) {
2134 case 0x27: { /* DAA */
2135 UInt old_AL = r_AL;
2136 UInt old_C = r_C;
2137 r_C = 0;
2138 if ((r_AL & 0xF) > 9 || r_A == 1) {
2139 r_AL = r_AL + 6;
2140 r_C = old_C;
2141 if (r_AL >= 0x100) r_C = 1;
2142 r_A = 1;
2143 } else {
2144 r_A = 0;
2146 if (old_AL > 0x99 || old_C == 1) {
2147 r_AL = r_AL + 0x60;
2148 r_C = 1;
2149 } else {
2150 r_C = 0;
2152 /* O is undefined. S Z and P are set according to the
2153 result. */
2154 r_AL &= 0xFF;
2155 r_O = 0; /* let's say */
2156 r_S = (r_AL & 0x80) ? 1 : 0;
2157 r_Z = (r_AL == 0) ? 1 : 0;
2158 r_P = calc_parity_8bit( r_AL );
2159 break;
2161 case 0x2F: { /* DAS */
2162 UInt old_AL = r_AL;
2163 UInt old_C = r_C;
2164 r_C = 0;
2165 if ((r_AL & 0xF) > 9 || r_A == 1) {
2166 Bool borrow = r_AL < 6;
2167 r_AL = r_AL - 6;
2168 r_C = old_C;
2169 if (borrow) r_C = 1;
2170 r_A = 1;
2171 } else {
2172 r_A = 0;
2174 if (old_AL > 0x99 || old_C == 1) {
2175 r_AL = r_AL - 0x60;
2176 r_C = 1;
2177 } else {
2178 /* Intel docs are wrong: r_C = 0; */
2180 /* O is undefined. S Z and P are set according to the
2181 result. */
2182 r_AL &= 0xFF;
2183 r_O = 0; /* let's say */
2184 r_S = (r_AL & 0x80) ? 1 : 0;
2185 r_Z = (r_AL == 0) ? 1 : 0;
2186 r_P = calc_parity_8bit( r_AL );
2187 break;
2189 case 0x37: { /* AAA */
2190 Bool nudge = r_AL > 0xF9;
2191 if ((r_AL & 0xF) > 9 || r_A == 1) {
2192 r_AL = r_AL + 6;
2193 r_AH = r_AH + 1 + (nudge ? 1 : 0);
2194 r_A = 1;
2195 r_C = 1;
2196 r_AL = r_AL & 0xF;
2197 } else {
2198 r_A = 0;
2199 r_C = 0;
2200 r_AL = r_AL & 0xF;
2202 /* O S Z and P are undefined. */
2203 r_O = r_S = r_Z = r_P = 0; /* let's say */
2204 break;
2206 case 0x3F: { /* AAS */
2207 Bool nudge = r_AL < 0x06;
2208 if ((r_AL & 0xF) > 9 || r_A == 1) {
2209 r_AL = r_AL - 6;
2210 r_AH = r_AH - 1 - (nudge ? 1 : 0);
2211 r_A = 1;
2212 r_C = 1;
2213 r_AL = r_AL & 0xF;
2214 } else {
2215 r_A = 0;
2216 r_C = 0;
2217 r_AL = r_AL & 0xF;
2219 /* O S Z and P are undefined. */
2220 r_O = r_S = r_Z = r_P = 0; /* let's say */
2221 break;
2223 default:
2224 vassert(0);
2226 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2227 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2228 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2229 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2230 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2231 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2232 | ( (r_AH & 0xFF) << 8 )
2233 | ( (r_AL & 0xFF) << 0 );
2234 return result;
2237 UInt x86g_calculate_aad_aam ( UInt flags_and_AX, UInt opcode )
2239 UInt r_AL = (flags_and_AX >> 0) & 0xFF;
2240 UInt r_AH = (flags_and_AX >> 8) & 0xFF;
2241 UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
2242 UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
2243 UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
2244 UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
2245 UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
2246 UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
2247 UInt result = 0;
2249 switch (opcode) {
2250 case 0xD4: { /* AAM */
2251 r_AH = r_AL / 10;
2252 r_AL = r_AL % 10;
2253 break;
2255 case 0xD5: { /* AAD */
2256 r_AL = ((r_AH * 10) + r_AL) & 0xff;
2257 r_AH = 0;
2258 break;
2260 default:
2261 vassert(0);
2264 r_O = 0; /* let's say (undefined) */
2265 r_C = 0; /* let's say (undefined) */
2266 r_A = 0; /* let's say (undefined) */
2267 r_S = (r_AL & 0x80) ? 1 : 0;
2268 r_Z = (r_AL == 0) ? 1 : 0;
2269 r_P = calc_parity_8bit( r_AL );
2271 result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
2272 | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
2273 | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
2274 | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
2275 | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
2276 | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
2277 | ( (r_AH & 0xFF) << 8 )
2278 | ( (r_AL & 0xFF) << 0 );
2279 return result;
2283 /* CALLED FROM GENERATED CODE */
2284 /* DIRTY HELPER (non-referentially-transparent) */
2285 /* Horrible hack. On non-x86 platforms, return 1. */
2286 ULong x86g_dirtyhelper_RDTSC ( void )
2288 # if defined(__i386__)
2289 ULong res;
2290 __asm__ __volatile__("rdtsc" : "=A" (res));
2291 return res;
2292 # else
2293 return 1ULL;
2294 # endif
2298 /* CALLED FROM GENERATED CODE */
2299 /* DIRTY HELPER (modifies guest state) */
2300 /* Claim to be a P55C (Intel Pentium/MMX) */
2301 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
2303 switch (st->guest_EAX) {
2304 case 0:
2305 st->guest_EAX = 0x1;
2306 st->guest_EBX = 0x756e6547;
2307 st->guest_ECX = 0x6c65746e;
2308 st->guest_EDX = 0x49656e69;
2309 break;
2310 default:
2311 st->guest_EAX = 0x543;
2312 st->guest_EBX = 0x0;
2313 st->guest_ECX = 0x0;
2314 st->guest_EDX = 0x8001bf;
2315 break;
2319 /* CALLED FROM GENERATED CODE */
2320 /* DIRTY HELPER (modifies guest state) */
2321 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2322 /* But without 3DNow support (weird, but we really don't support it). */
2323 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
2325 switch (st->guest_EAX) {
2326 /* vendor ID */
2327 case 0:
2328 st->guest_EAX = 0x1;
2329 st->guest_EBX = 0x68747541;
2330 st->guest_ECX = 0x444d4163;
2331 st->guest_EDX = 0x69746e65;
2332 break;
2333 /* feature bits */
2334 case 1:
2335 st->guest_EAX = 0x621;
2336 st->guest_EBX = 0x0;
2337 st->guest_ECX = 0x0;
2338 st->guest_EDX = 0x183f9ff;
2339 break;
2340 /* Highest Extended Function Supported (0x80000004 brand string) */
2341 case 0x80000000:
2342 st->guest_EAX = 0x80000004;
2343 st->guest_EBX = 0x68747541;
2344 st->guest_ECX = 0x444d4163;
2345 st->guest_EDX = 0x69746e65;
2346 break;
2347 /* Extended Processor Info and Feature Bits */
2348 case 0x80000001:
2349 st->guest_EAX = 0x721;
2350 st->guest_EBX = 0x0;
2351 st->guest_ECX = 0x0;
2352 st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
2353 break;
2354 /* Processor Brand String "AMD Athlon(tm) Processor" */
2355 case 0x80000002:
2356 st->guest_EAX = 0x20444d41;
2357 st->guest_EBX = 0x6c687441;
2358 st->guest_ECX = 0x74286e6f;
2359 st->guest_EDX = 0x5020296d;
2360 break;
2361 case 0x80000003:
2362 st->guest_EAX = 0x65636f72;
2363 st->guest_EBX = 0x726f7373;
2364 st->guest_ECX = 0x0;
2365 st->guest_EDX = 0x0;
2366 break;
2367 default:
2368 st->guest_EAX = 0x0;
2369 st->guest_EBX = 0x0;
2370 st->guest_ECX = 0x0;
2371 st->guest_EDX = 0x0;
2372 break;
2376 /* CALLED FROM GENERATED CODE */
2377 /* DIRTY HELPER (modifies guest state) */
2378 /* Claim to be the following SSE1-capable CPU:
2379 vendor_id : GenuineIntel
2380 cpu family : 6
2381 model : 11
2382 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2383 stepping : 1
2384 cpu MHz : 1131.013
2385 cache size : 512 KB
2387 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
2389 switch (st->guest_EAX) {
2390 case 0:
2391 st->guest_EAX = 0x00000002;
2392 st->guest_EBX = 0x756e6547;
2393 st->guest_ECX = 0x6c65746e;
2394 st->guest_EDX = 0x49656e69;
2395 break;
2396 case 1:
2397 st->guest_EAX = 0x000006b1;
2398 st->guest_EBX = 0x00000004;
2399 st->guest_ECX = 0x00000000;
2400 st->guest_EDX = 0x0383fbff;
2401 break;
2402 default:
2403 st->guest_EAX = 0x03020101;
2404 st->guest_EBX = 0x00000000;
2405 st->guest_ECX = 0x00000000;
2406 st->guest_EDX = 0x0c040883;
2407 break;
2411 /* Claim to be the following SSE2-capable CPU:
2412 vendor_id : GenuineIntel
2413 cpu family : 15
2414 model : 2
2415 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
2416 stepping : 9
2417 microcode : 0x17
2418 cpu MHz : 2992.577
2419 cache size : 512 KB
2420 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2421 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2422 pebs bts cid xtpr
2423 clflush size : 64
2424 cache_alignment : 128
2425 address sizes : 36 bits physical, 32 bits virtual
2427 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
2429 switch (st->guest_EAX) {
2430 case 0:
2431 st->guest_EAX = 0x00000002;
2432 st->guest_EBX = 0x756e6547;
2433 st->guest_ECX = 0x6c65746e;
2434 st->guest_EDX = 0x49656e69;
2435 break;
2436 case 1:
2437 st->guest_EAX = 0x00000f29;
2438 st->guest_EBX = 0x01020809;
2439 st->guest_ECX = 0x00004400;
2440 st->guest_EDX = 0xbfebfbff;
2441 break;
2442 default:
2443 st->guest_EAX = 0x03020101;
2444 st->guest_EBX = 0x00000000;
2445 st->guest_ECX = 0x00000000;
2446 st->guest_EDX = 0x0c040883;
2447 break;
2451 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2452 vendor_id : GenuineIntel
2453 cpu family : 6
2454 model : 15
2455 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2456 stepping : 6
2457 cpu MHz : 2394.000
2458 cache size : 4096 KB
2459 physical id : 0
2460 siblings : 2
2461 core id : 0
2462 cpu cores : 2
2463 fpu : yes
2464 fpu_exception : yes
2465 cpuid level : 10
2466 wp : yes
2467 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2468 mtrr pge mca cmov pat pse36 clflush dts acpi
2469 mmx fxsr sse sse2 ss ht tm syscall nx lm
2470 constant_tsc pni monitor ds_cpl vmx est tm2
2471 cx16 xtpr lahf_lm
2472 bogomips : 4798.78
2473 clflush size : 64
2474 cache_alignment : 64
2475 address sizes : 36 bits physical, 48 bits virtual
2476 power management:
2478 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State* st )
2480 # define SET_ABCD(_a,_b,_c,_d) \
2481 do { st->guest_EAX = (UInt)(_a); \
2482 st->guest_EBX = (UInt)(_b); \
2483 st->guest_ECX = (UInt)(_c); \
2484 st->guest_EDX = (UInt)(_d); \
2485 } while (0)
2487 switch (st->guest_EAX) {
2488 case 0x00000000:
2489 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2490 break;
2491 case 0x00000001:
2492 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2493 break;
2494 case 0x00000002:
2495 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2496 break;
2497 case 0x00000003:
2498 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2499 break;
2500 case 0x00000004: {
2501 switch (st->guest_ECX) {
2502 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2503 0x0000003f, 0x00000001); break;
2504 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2505 0x0000003f, 0x00000001); break;
2506 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2507 0x00000fff, 0x00000001); break;
2508 default: SET_ABCD(0x00000000, 0x00000000,
2509 0x00000000, 0x00000000); break;
2511 break;
2513 case 0x00000005:
2514 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2515 break;
2516 case 0x00000006:
2517 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2518 break;
2519 case 0x00000007:
2520 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2521 break;
2522 case 0x00000008:
2523 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2524 break;
2525 case 0x00000009:
2526 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2527 break;
2528 case 0x0000000a:
2529 unhandled_eax_value:
2530 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2531 break;
2532 case 0x80000000:
2533 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2534 break;
2535 case 0x80000001:
2536 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2537 break;
2538 case 0x80000002:
2539 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2540 break;
2541 case 0x80000003:
2542 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2543 break;
2544 case 0x80000004:
2545 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2546 break;
2547 case 0x80000005:
2548 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2549 break;
2550 case 0x80000006:
2551 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2552 break;
2553 case 0x80000007:
2554 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2555 break;
2556 case 0x80000008:
2557 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2558 break;
2559 default:
2560 goto unhandled_eax_value;
2562 # undef SET_ABCD
2566 /* CALLED FROM GENERATED CODE */
2567 /* DIRTY HELPER (non-referentially-transparent) */
2568 /* Horrible hack. On non-x86 platforms, return 0. */
2569 UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
2571 # if defined(__i386__)
2572 UInt r = 0;
2573 portno &= 0xFFFF;
2574 switch (sz) {
2575 case 4:
2576 __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
2577 : "=a" (r) : "Nd" (portno));
2578 break;
2579 case 2:
2580 __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
2581 : "=a" (r) : "Nd" (portno));
2582 break;
2583 case 1:
2584 __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
2585 : "=a" (r) : "Nd" (portno));
2586 break;
2587 default:
2588 break;
2590 return r;
2591 # else
2592 return 0;
2593 # endif
2597 /* CALLED FROM GENERATED CODE */
2598 /* DIRTY HELPER (non-referentially-transparent) */
2599 /* Horrible hack. On non-x86 platforms, do nothing. */
2600 void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
2602 # if defined(__i386__)
2603 portno &= 0xFFFF;
2604 switch (sz) {
2605 case 4:
2606 __asm__ __volatile__("outl %0, %w1"
2607 : : "a" (data), "Nd" (portno));
2608 break;
2609 case 2:
2610 __asm__ __volatile__("outw %w0, %w1"
2611 : : "a" (data), "Nd" (portno));
2612 break;
2613 case 1:
2614 __asm__ __volatile__("outb %b0, %w1"
2615 : : "a" (data), "Nd" (portno));
2616 break;
2617 default:
2618 break;
2620 # else
2621 /* do nothing */
2622 # endif
2625 /* CALLED FROM GENERATED CODE */
2626 /* DIRTY HELPER (non-referentially-transparent) */
2627 /* Horrible hack. On non-x86 platforms, do nothing. */
2628 /* op = 0: call the native SGDT instruction.
2629 op = 1: call the native SIDT instruction.
2631 void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
2632 # if defined(__i386__)
2633 switch (op) {
2634 case 0:
2635 __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
2636 break;
2637 case 1:
2638 __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
2639 break;
2640 default:
2641 vpanic("x86g_dirtyhelper_SxDT");
2643 # else
2644 /* do nothing */
2645 UChar* p = (UChar*)address;
2646 p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
2647 # endif
2650 /*---------------------------------------------------------------*/
2651 /*--- Helpers for MMX/SSE/SSE2. ---*/
2652 /*---------------------------------------------------------------*/
2654 static inline UChar abdU8 ( UChar xx, UChar yy ) {
2655 return toUChar(xx>yy ? xx-yy : yy-xx);
2658 static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
2659 return (((ULong)w1) << 32) | ((ULong)w0);
2662 static inline UShort sel16x4_3 ( ULong w64 ) {
2663 UInt hi32 = toUInt(w64 >> 32);
2664 return toUShort(hi32 >> 16);
2666 static inline UShort sel16x4_2 ( ULong w64 ) {
2667 UInt hi32 = toUInt(w64 >> 32);
2668 return toUShort(hi32);
2670 static inline UShort sel16x4_1 ( ULong w64 ) {
2671 UInt lo32 = toUInt(w64);
2672 return toUShort(lo32 >> 16);
2674 static inline UShort sel16x4_0 ( ULong w64 ) {
2675 UInt lo32 = toUInt(w64);
2676 return toUShort(lo32);
2679 static inline UChar sel8x8_7 ( ULong w64 ) {
2680 UInt hi32 = toUInt(w64 >> 32);
2681 return toUChar(hi32 >> 24);
2683 static inline UChar sel8x8_6 ( ULong w64 ) {
2684 UInt hi32 = toUInt(w64 >> 32);
2685 return toUChar(hi32 >> 16);
2687 static inline UChar sel8x8_5 ( ULong w64 ) {
2688 UInt hi32 = toUInt(w64 >> 32);
2689 return toUChar(hi32 >> 8);
2691 static inline UChar sel8x8_4 ( ULong w64 ) {
2692 UInt hi32 = toUInt(w64 >> 32);
2693 return toUChar(hi32 >> 0);
2695 static inline UChar sel8x8_3 ( ULong w64 ) {
2696 UInt lo32 = toUInt(w64);
2697 return toUChar(lo32 >> 24);
2699 static inline UChar sel8x8_2 ( ULong w64 ) {
2700 UInt lo32 = toUInt(w64);
2701 return toUChar(lo32 >> 16);
2703 static inline UChar sel8x8_1 ( ULong w64 ) {
2704 UInt lo32 = toUInt(w64);
2705 return toUChar(lo32 >> 8);
2707 static inline UChar sel8x8_0 ( ULong w64 ) {
2708 UInt lo32 = toUInt(w64);
2709 return toUChar(lo32 >> 0);
2712 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2713 ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
2715 return
2716 mk32x2(
2717 (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
2718 + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
2719 (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
2720 + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
2724 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2725 ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
2727 UInt t = 0;
2728 t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
2729 t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
2730 t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
2731 t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
2732 t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
2733 t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
2734 t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
2735 t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
2736 t &= 0xFFFF;
2737 return (ULong)t;
2741 /*---------------------------------------------------------------*/
2742 /*--- Helpers for dealing with segment overrides. ---*/
2743 /*---------------------------------------------------------------*/
2745 static inline
2746 UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
2748 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
2749 UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
2750 UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
2751 return (hi << 24) | (mid << 16) | lo;
2754 static inline
2755 UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
2757 UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
2758 UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
2759 UInt limit = (hi << 16) | lo;
2760 if (ent->LdtEnt.Bits.Granularity)
2761 limit = (limit << 12) | 0xFFF;
2762 return limit;
2765 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2766 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2767 UInt seg_selector, UInt virtual_addr )
2769 UInt tiBit, base, limit;
2770 VexGuestX86SegDescr* the_descrs;
2772 Bool verboze = False;
2774 /* If this isn't true, we're in Big Trouble. */
2775 vassert(8 == sizeof(VexGuestX86SegDescr));
2777 if (verboze) {
2778 // Coverity is right but this is unimportant
2779 // coverity[DEADCODE:FALSE]
2780 vex_printf("x86h_use_seg_selector: "
2781 "seg_selector = 0x%x, vaddr = 0x%x\n",
2782 seg_selector, virtual_addr);
2785 /* Check for wildly invalid selector. */
2786 if (seg_selector & ~0xFFFF)
2787 goto bad;
2789 seg_selector &= 0x0000FFFF;
2791 /* Sanity check the segment selector. Ensure that RPL=11b (least
2792 privilege). This forms the bottom 2 bits of the selector. */
2793 if ((seg_selector & 3) != 3)
2794 goto bad;
2796 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2797 tiBit = (seg_selector >> 2) & 1;
2799 /* Convert the segment selector onto a table index */
2800 seg_selector >>= 3;
2801 vassert(seg_selector < VEX_GUEST_X86_GDT_NENT);
2803 if (tiBit == 0) {
2805 /* GDT access. */
2806 /* Do we actually have a GDT to look at? */
2807 if (gdt == 0)
2808 goto bad;
2810 the_descrs = (VexGuestX86SegDescr*)gdt;
2811 base = get_segdescr_base (&the_descrs[seg_selector]);
2812 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2814 } else {
2816 /* All the same stuff, except for the LDT. */
2817 if (ldt == 0)
2818 goto bad;
2820 if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
2821 goto bad;
2823 the_descrs = (VexGuestX86SegDescr*)ldt;
2824 base = get_segdescr_base (&the_descrs[seg_selector]);
2825 limit = get_segdescr_limit(&the_descrs[seg_selector]);
2829 /* Do the limit check. Note, this check is just slightly too
2830 slack. Really it should be "if (virtual_addr + size - 1 >=
2831 limit)," but we don't have the size info to hand. Getting it
2832 could be significantly complex. */
2833 if (virtual_addr >= limit)
2834 goto bad;
2836 if (verboze)
2837 vex_printf("x86h_use_seg_selector: "
2838 "base = 0x%x, addr = 0x%x\n",
2839 base, base + virtual_addr);
2841 /* High 32 bits are zero, indicating success. */
2842 return (ULong)( ((UInt)virtual_addr) + base );
2844 bad:
2845 return 1ULL << 32;
2849 /*---------------------------------------------------------------*/
2850 /*--- Helpers for dealing with, and describing, ---*/
2851 /*--- guest state as a whole. ---*/
2852 /*---------------------------------------------------------------*/
2854 /* Initialise the entire x86 guest state. */
2855 /* VISIBLE TO LIBVEX CLIENT */
2856 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
2858 vex_state->host_EvC_FAILADDR = 0;
2859 vex_state->host_EvC_COUNTER = 0;
2861 vex_state->guest_EAX = 0;
2862 vex_state->guest_ECX = 0;
2863 vex_state->guest_EDX = 0;
2864 vex_state->guest_EBX = 0;
2865 vex_state->guest_ESP = 0;
2866 vex_state->guest_EBP = 0;
2867 vex_state->guest_ESI = 0;
2868 vex_state->guest_EDI = 0;
2870 vex_state->guest_CC_OP = X86G_CC_OP_COPY;
2871 vex_state->guest_CC_DEP1 = 0;
2872 vex_state->guest_CC_DEP2 = 0;
2873 vex_state->guest_CC_NDEP = 0;
2874 vex_state->guest_DFLAG = 1; /* forwards */
2875 vex_state->guest_IDFLAG = 0;
2876 vex_state->guest_ACFLAG = 0;
2878 vex_state->guest_EIP = 0;
2880 /* Initialise the simulated FPU */
2881 x86g_dirtyhelper_FINIT( vex_state );
2883 /* Initialse the SSE state. */
2884 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2886 vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
2887 SSEZERO(vex_state->guest_XMM0);
2888 SSEZERO(vex_state->guest_XMM1);
2889 SSEZERO(vex_state->guest_XMM2);
2890 SSEZERO(vex_state->guest_XMM3);
2891 SSEZERO(vex_state->guest_XMM4);
2892 SSEZERO(vex_state->guest_XMM5);
2893 SSEZERO(vex_state->guest_XMM6);
2894 SSEZERO(vex_state->guest_XMM7);
2896 # undef SSEZERO
2898 vex_state->guest_CS = 0;
2899 vex_state->guest_DS = 0;
2900 vex_state->guest_ES = 0;
2901 vex_state->guest_FS = 0;
2902 vex_state->guest_GS = 0;
2903 vex_state->guest_SS = 0;
2904 vex_state->guest_LDT = 0;
2905 vex_state->guest_GDT = 0;
2907 vex_state->guest_EMNOTE = EmNote_NONE;
2909 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2910 vex_state->guest_CMSTART = 0;
2911 vex_state->guest_CMLEN = 0;
2913 vex_state->guest_NRADDR = 0;
2914 vex_state->guest_SC_CLASS = 0;
2915 vex_state->guest_IP_AT_SYSCALL = 0;
2917 vex_state->guest_SETC = 0;
2919 vex_state->padding1 = 0;
2920 vex_state->padding2 = 0;
2924 /* Figure out if any part of the guest state contained in minoff
2925 .. maxoff requires precise memory exceptions. If in doubt return
2926 True (but this generates significantly slower code).
2928 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2929 only. These are the minimum needed to extract correct stack
2930 backtraces from x86 code.
2932 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2934 Bool guest_x86_state_requires_precise_mem_exns (
2935 Int minoff, Int maxoff, VexRegisterUpdates pxControl
2938 Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
2939 Int ebp_max = ebp_min + 4 - 1;
2940 Int esp_min = offsetof(VexGuestX86State, guest_ESP);
2941 Int esp_max = esp_min + 4 - 1;
2942 Int eip_min = offsetof(VexGuestX86State, guest_EIP);
2943 Int eip_max = eip_min + 4 - 1;
2945 if (maxoff < esp_min || minoff > esp_max) {
2946 /* no overlap with esp */
2947 if (pxControl == VexRegUpdSpAtMemAccess)
2948 return False; // We only need to check stack pointer.
2949 } else {
2950 return True;
2953 if (maxoff < ebp_min || minoff > ebp_max) {
2954 /* no overlap with ebp */
2955 } else {
2956 return True;
2959 if (maxoff < eip_min || minoff > eip_max) {
2960 /* no overlap with eip */
2961 } else {
2962 return True;
2965 return False;
2969 #define ALWAYSDEFD(field) \
2970 { offsetof(VexGuestX86State, field), \
2971 (sizeof ((VexGuestX86State*)0)->field) }
2973 VexGuestLayout
2974 x86guest_layout
2975 = {
2976 /* Total size of the guest state, in bytes. */
2977 .total_sizeB = sizeof(VexGuestX86State),
2979 /* Describe the stack pointer. */
2980 .offset_SP = offsetof(VexGuestX86State,guest_ESP),
2981 .sizeof_SP = 4,
2983 /* Describe the frame pointer. */
2984 .offset_FP = offsetof(VexGuestX86State,guest_EBP),
2985 .sizeof_FP = 4,
2987 /* Describe the instruction pointer. */
2988 .offset_IP = offsetof(VexGuestX86State,guest_EIP),
2989 .sizeof_IP = 4,
2991 /* Describe any sections to be regarded by Memcheck as
2992 'always-defined'. */
2993 .n_alwaysDefd = 24,
2995 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2996 and DEP2 have to be tracked. See detailed comment in
2997 gdefs.h on meaning of thunk fields. */
2998 .alwaysDefd
2999 = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
3000 /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
3001 /* 2 */ ALWAYSDEFD(guest_DFLAG),
3002 /* 3 */ ALWAYSDEFD(guest_IDFLAG),
3003 /* 4 */ ALWAYSDEFD(guest_ACFLAG),
3004 /* 5 */ ALWAYSDEFD(guest_EIP),
3005 /* 6 */ ALWAYSDEFD(guest_FTOP),
3006 /* 7 */ ALWAYSDEFD(guest_FPTAG),
3007 /* 8 */ ALWAYSDEFD(guest_FPROUND),
3008 /* 9 */ ALWAYSDEFD(guest_FC3210),
3009 /* 10 */ ALWAYSDEFD(guest_CS),
3010 /* 11 */ ALWAYSDEFD(guest_DS),
3011 /* 12 */ ALWAYSDEFD(guest_ES),
3012 /* 13 */ ALWAYSDEFD(guest_FS),
3013 /* 14 */ ALWAYSDEFD(guest_GS),
3014 /* 15 */ ALWAYSDEFD(guest_SS),
3015 /* 16 */ ALWAYSDEFD(guest_LDT),
3016 /* 17 */ ALWAYSDEFD(guest_GDT),
3017 /* 18 */ ALWAYSDEFD(guest_EMNOTE),
3018 /* 19 */ ALWAYSDEFD(guest_SSEROUND),
3019 /* 20 */ ALWAYSDEFD(guest_CMSTART),
3020 /* 21 */ ALWAYSDEFD(guest_CMLEN),
3021 /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
3022 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
3027 /*---------------------------------------------------------------*/
3028 /*--- end guest_x86_helpers.c ---*/
3029 /*---------------------------------------------------------------*/