2 /*---------------------------------------------------------------*/
3 /*--- begin guest_x86_helpers.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex_emnote.h"
38 #include "libvex_guest_x86.h"
39 #include "libvex_ir.h"
42 #include "main_util.h"
43 #include "main_globals.h"
44 #include "guest_generic_bb_to_IR.h"
45 #include "guest_x86_defs.h"
46 #include "guest_generic_x87.h"
49 /* This file contains helper functions for x86 guest code.
50 Calls to these functions are generated by the back end.
51 These calls are of course in the host machine code and
52 this file will be compiled to host machine code, so that
55 Only change the signatures of these helper functions very
56 carefully. If you change the signature here, you'll have to change
57 the parameters passed to it in the IR calls constructed by
60 The convention used is that all functions called from generated
61 code are named x86g_<something>, and any function whose name lacks
62 that prefix is not called from generated code. Note that some
63 LibVEX_* functions can however be called by VEX's client, but that
64 is not the same as calling them from VEX-generated code.
68 /* Set to 1 to get detailed profiling info about use of the flag
70 #define PROFILE_EFLAGS 0
73 /*---------------------------------------------------------------*/
74 /*--- %eflags run-time helpers. ---*/
75 /*---------------------------------------------------------------*/
77 static const UChar parity_table
[256] = {
78 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
79 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
80 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
81 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
82 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
83 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
84 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
85 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
86 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
87 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
88 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
89 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
90 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
91 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
92 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
93 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
94 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
95 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
96 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
97 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
98 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
99 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
100 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
101 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
102 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
103 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
104 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
105 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
106 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
107 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
108 X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0,
109 0, X86G_CC_MASK_P
, X86G_CC_MASK_P
, 0, X86G_CC_MASK_P
, 0, 0, X86G_CC_MASK_P
,
112 /* generalised left-shifter */
113 inline static Int
lshift ( Int x
, Int n
)
121 /* identity on ULong */
122 static inline ULong
idULong ( ULong x
)
128 #define PREAMBLE(__data_bits) \
129 /* const */ UInt DATA_MASK \
130 = __data_bits==8 ? 0xFF \
131 : (__data_bits==16 ? 0xFFFF \
133 /* const */ UInt SIGN_MASK = 1u << (__data_bits - 1); \
134 /* const */ UInt CC_DEP1 = cc_dep1_formal; \
135 /* const */ UInt CC_DEP2 = cc_dep2_formal; \
136 /* const */ UInt CC_NDEP = cc_ndep_formal; \
137 /* Four bogus assignments, which hopefully gcc can */ \
138 /* optimise away, and which stop it complaining about */ \
139 /* unused variables. */ \
140 SIGN_MASK = SIGN_MASK; \
141 DATA_MASK = DATA_MASK; \
146 /*-------------------------------------------------------------*/
148 #define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
150 PREAMBLE(DATA_BITS); \
151 { UInt cf, pf, af, zf, sf, of; \
152 UInt argL, argR, res; \
156 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
157 pf = parity_table[(UChar)res]; \
158 af = (res ^ argL ^ argR) & 0x10; \
159 zf = ((DATA_UTYPE)res == 0) << 6; \
160 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
161 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
162 12 - DATA_BITS) & X86G_CC_MASK_O; \
163 return cf | pf | af | zf | sf | of; \
167 /*-------------------------------------------------------------*/
169 #define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
171 PREAMBLE(DATA_BITS); \
172 { UInt cf, pf, af, zf, sf, of; \
173 UInt argL, argR, res; \
177 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
178 pf = parity_table[(UChar)res]; \
179 af = (res ^ argL ^ argR) & 0x10; \
180 zf = ((DATA_UTYPE)res == 0) << 6; \
181 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
182 of = lshift((argL ^ argR) & (argL ^ res), \
183 12 - DATA_BITS) & X86G_CC_MASK_O; \
184 return cf | pf | af | zf | sf | of; \
188 /*-------------------------------------------------------------*/
190 #define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
192 PREAMBLE(DATA_BITS); \
193 { UInt cf, pf, af, zf, sf, of; \
194 UInt argL, argR, oldC, res; \
195 oldC = CC_NDEP & X86G_CC_MASK_C; \
197 argR = CC_DEP2 ^ oldC; \
198 res = (argL + argR) + oldC; \
200 cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
202 cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
203 pf = parity_table[(UChar)res]; \
204 af = (res ^ argL ^ argR) & 0x10; \
205 zf = ((DATA_UTYPE)res == 0) << 6; \
206 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
207 of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
208 12 - DATA_BITS) & X86G_CC_MASK_O; \
209 return cf | pf | af | zf | sf | of; \
213 /*-------------------------------------------------------------*/
215 #define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
217 PREAMBLE(DATA_BITS); \
218 { UInt cf, pf, af, zf, sf, of; \
219 UInt argL, argR, oldC, res; \
220 oldC = CC_NDEP & X86G_CC_MASK_C; \
222 argR = CC_DEP2 ^ oldC; \
223 res = (argL - argR) - oldC; \
225 cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
227 cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
228 pf = parity_table[(UChar)res]; \
229 af = (res ^ argL ^ argR) & 0x10; \
230 zf = ((DATA_UTYPE)res == 0) << 6; \
231 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
232 of = lshift((argL ^ argR) & (argL ^ res), \
233 12 - DATA_BITS) & X86G_CC_MASK_O; \
234 return cf | pf | af | zf | sf | of; \
238 /*-------------------------------------------------------------*/
240 #define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
242 PREAMBLE(DATA_BITS); \
243 { UInt cf, pf, af, zf, sf, of; \
245 pf = parity_table[(UChar)CC_DEP1]; \
247 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
248 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
250 return cf | pf | af | zf | sf | of; \
254 /*-------------------------------------------------------------*/
256 #define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
258 PREAMBLE(DATA_BITS); \
259 { UInt cf, pf, af, zf, sf, of; \
260 UInt argL, argR, res; \
264 cf = CC_NDEP & X86G_CC_MASK_C; \
265 pf = parity_table[(UChar)res]; \
266 af = (res ^ argL ^ argR) & 0x10; \
267 zf = ((DATA_UTYPE)res == 0) << 6; \
268 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
269 of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
270 return cf | pf | af | zf | sf | of; \
274 /*-------------------------------------------------------------*/
276 #define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
278 PREAMBLE(DATA_BITS); \
279 { UInt cf, pf, af, zf, sf, of; \
280 UInt argL, argR, res; \
284 cf = CC_NDEP & X86G_CC_MASK_C; \
285 pf = parity_table[(UChar)res]; \
286 af = (res ^ argL ^ argR) & 0x10; \
287 zf = ((DATA_UTYPE)res == 0) << 6; \
288 sf = lshift(res, 8 - DATA_BITS) & 0x80; \
289 of = ((res & DATA_MASK) \
290 == ((UInt)SIGN_MASK - 1)) << 11; \
291 return cf | pf | af | zf | sf | of; \
295 /*-------------------------------------------------------------*/
297 #define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
299 PREAMBLE(DATA_BITS); \
300 { UInt cf, pf, af, zf, sf, of; \
301 cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
302 pf = parity_table[(UChar)CC_DEP1]; \
303 af = 0; /* undefined */ \
304 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
305 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
306 /* of is defined if shift count == 1 */ \
307 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
309 return cf | pf | af | zf | sf | of; \
313 /*-------------------------------------------------------------*/
315 #define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
317 PREAMBLE(DATA_BITS); \
318 { UInt cf, pf, af, zf, sf, of; \
320 pf = parity_table[(UChar)CC_DEP1]; \
321 af = 0; /* undefined */ \
322 zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
323 sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
324 /* of is defined if shift count == 1 */ \
325 of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
327 return cf | pf | af | zf | sf | of; \
331 /*-------------------------------------------------------------*/
333 /* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
334 /* DEP1 = result, NDEP = old flags */
335 #define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
337 PREAMBLE(DATA_BITS); \
339 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
340 | (X86G_CC_MASK_C & CC_DEP1) \
341 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
343 ^ lshift(CC_DEP1, 11))); \
348 /*-------------------------------------------------------------*/
350 /* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
351 /* DEP1 = result, NDEP = old flags */
352 #define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
354 PREAMBLE(DATA_BITS); \
356 = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
357 | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
358 | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
360 ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
365 /*-------------------------------------------------------------*/
367 #define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
368 DATA_U2TYPE, NARROWto2U) \
370 PREAMBLE(DATA_BITS); \
371 { UInt cf, pf, af, zf, sf, of; \
374 = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
375 * ((DATA_UTYPE)CC_DEP2) ); \
378 ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
379 * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
380 hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
382 pf = parity_table[(UChar)lo]; \
383 af = 0; /* undefined */ \
384 zf = (lo == 0) << 6; \
385 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
387 return cf | pf | af | zf | sf | of; \
391 /*-------------------------------------------------------------*/
393 #define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
394 DATA_S2TYPE, NARROWto2S) \
396 PREAMBLE(DATA_BITS); \
397 { UInt cf, pf, af, zf, sf, of; \
400 = NARROWtoS( ((DATA_S2TYPE)(DATA_STYPE)CC_DEP1) \
401 * ((DATA_S2TYPE)(DATA_STYPE)CC_DEP2) ); \
404 ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
405 * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
406 hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
407 cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
408 pf = parity_table[(UChar)lo]; \
409 af = 0; /* undefined */ \
410 zf = (lo == 0) << 6; \
411 sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
413 return cf | pf | af | zf | sf | of; \
420 static Bool initted
= False
;
422 /* C flag, fast route */
423 static UInt tabc_fast
[X86G_CC_OP_NUMBER
];
424 /* C flag, slow route */
425 static UInt tabc_slow
[X86G_CC_OP_NUMBER
];
426 /* table for calculate_cond */
427 static UInt tab_cond
[X86G_CC_OP_NUMBER
][16];
428 /* total entry counts for calc_all, calc_c, calc_cond. */
429 static UInt n_calc_all
= 0;
430 static UInt n_calc_c
= 0;
431 static UInt n_calc_cond
= 0;
433 #define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
436 static void showCounts ( void )
440 vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
441 n_calc_all
, n_calc_cond
, n_calc_c
);
443 vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
444 " S NS P NP L NL LE NLE\n");
445 vex_printf(" -----------------------------------------------------"
446 "----------------------------------------\n");
447 for (op
= 0; op
< X86G_CC_OP_NUMBER
; op
++) {
450 if (op
> 0 && (op
-1) % 3 == 0)
452 if (op
> 0 && (op
-1) % 3 == 1)
454 if (op
> 0 && (op
-1) % 3 == 2)
457 vex_printf("%2d%c: ", op
, ch
);
458 vex_printf("%6u ", tabc_slow
[op
]);
459 vex_printf("%6u ", tabc_fast
[op
]);
460 for (co
= 0; co
< 16; co
++) {
461 Int n
= tab_cond
[op
][co
];
463 vex_printf(" %3dK", n
/ 1000);
466 vex_printf(" %3d ", n
);
476 static void initCounts ( void )
480 for (op
= 0; op
< X86G_CC_OP_NUMBER
; op
++) {
481 tabc_fast
[op
] = tabc_slow
[op
] = 0;
482 for (co
= 0; co
< 16; co
++)
483 tab_cond
[op
][co
] = 0;
487 #endif /* PROFILE_EFLAGS */
490 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
491 /* Calculate all the 6 flags from the supplied thunk parameters.
492 Worker function, not directly called from generated code. */
494 UInt
x86g_calculate_eflags_all_WRK ( UInt cc_op
,
497 UInt cc_ndep_formal
)
500 case X86G_CC_OP_COPY
:
501 return cc_dep1_formal
502 & (X86G_CC_MASK_O
| X86G_CC_MASK_S
| X86G_CC_MASK_Z
503 | X86G_CC_MASK_A
| X86G_CC_MASK_C
| X86G_CC_MASK_P
);
505 case X86G_CC_OP_ADDB
: ACTIONS_ADD( 8, UChar
);
506 case X86G_CC_OP_ADDW
: ACTIONS_ADD( 16, UShort
);
507 case X86G_CC_OP_ADDL
: ACTIONS_ADD( 32, UInt
);
509 case X86G_CC_OP_ADCB
: ACTIONS_ADC( 8, UChar
);
510 case X86G_CC_OP_ADCW
: ACTIONS_ADC( 16, UShort
);
511 case X86G_CC_OP_ADCL
: ACTIONS_ADC( 32, UInt
);
513 case X86G_CC_OP_SUBB
: ACTIONS_SUB( 8, UChar
);
514 case X86G_CC_OP_SUBW
: ACTIONS_SUB( 16, UShort
);
515 case X86G_CC_OP_SUBL
: ACTIONS_SUB( 32, UInt
);
517 case X86G_CC_OP_SBBB
: ACTIONS_SBB( 8, UChar
);
518 case X86G_CC_OP_SBBW
: ACTIONS_SBB( 16, UShort
);
519 case X86G_CC_OP_SBBL
: ACTIONS_SBB( 32, UInt
);
521 case X86G_CC_OP_LOGICB
: ACTIONS_LOGIC( 8, UChar
);
522 case X86G_CC_OP_LOGICW
: ACTIONS_LOGIC( 16, UShort
);
523 case X86G_CC_OP_LOGICL
: ACTIONS_LOGIC( 32, UInt
);
525 case X86G_CC_OP_INCB
: ACTIONS_INC( 8, UChar
);
526 case X86G_CC_OP_INCW
: ACTIONS_INC( 16, UShort
);
527 case X86G_CC_OP_INCL
: ACTIONS_INC( 32, UInt
);
529 case X86G_CC_OP_DECB
: ACTIONS_DEC( 8, UChar
);
530 case X86G_CC_OP_DECW
: ACTIONS_DEC( 16, UShort
);
531 case X86G_CC_OP_DECL
: ACTIONS_DEC( 32, UInt
);
533 case X86G_CC_OP_SHLB
: ACTIONS_SHL( 8, UChar
);
534 case X86G_CC_OP_SHLW
: ACTIONS_SHL( 16, UShort
);
535 case X86G_CC_OP_SHLL
: ACTIONS_SHL( 32, UInt
);
537 case X86G_CC_OP_SHRB
: ACTIONS_SHR( 8, UChar
);
538 case X86G_CC_OP_SHRW
: ACTIONS_SHR( 16, UShort
);
539 case X86G_CC_OP_SHRL
: ACTIONS_SHR( 32, UInt
);
541 case X86G_CC_OP_ROLB
: ACTIONS_ROL( 8, UChar
);
542 case X86G_CC_OP_ROLW
: ACTIONS_ROL( 16, UShort
);
543 case X86G_CC_OP_ROLL
: ACTIONS_ROL( 32, UInt
);
545 case X86G_CC_OP_RORB
: ACTIONS_ROR( 8, UChar
);
546 case X86G_CC_OP_RORW
: ACTIONS_ROR( 16, UShort
);
547 case X86G_CC_OP_RORL
: ACTIONS_ROR( 32, UInt
);
549 case X86G_CC_OP_UMULB
: ACTIONS_UMUL( 8, UChar
, toUChar
,
551 case X86G_CC_OP_UMULW
: ACTIONS_UMUL( 16, UShort
, toUShort
,
553 case X86G_CC_OP_UMULL
: ACTIONS_UMUL( 32, UInt
, toUInt
,
556 case X86G_CC_OP_SMULB
: ACTIONS_SMUL( 8, Char
, toUChar
,
558 case X86G_CC_OP_SMULW
: ACTIONS_SMUL( 16, Short
, toUShort
,
560 case X86G_CC_OP_SMULL
: ACTIONS_SMUL( 32, Int
, toUInt
,
564 /* shouldn't really make these calls from generated code */
565 vex_printf("x86g_calculate_eflags_all_WRK(X86)"
566 "( %u, 0x%x, 0x%x, 0x%x )\n",
567 cc_op
, cc_dep1_formal
, cc_dep2_formal
, cc_ndep_formal
);
568 vpanic("x86g_calculate_eflags_all_WRK(X86)");
573 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
574 /* Calculate all the 6 flags from the supplied thunk parameters. */
575 UInt
x86g_calculate_eflags_all ( UInt cc_op
,
581 if (!initted
) initCounts();
583 if (SHOW_COUNTS_NOW
) showCounts();
586 x86g_calculate_eflags_all_WRK ( cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
590 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
591 /* Calculate just the carry flag from the supplied thunk parameters. */
593 UInt
x86g_calculate_eflags_c ( UInt cc_op
,
599 if (!initted
) initCounts();
602 if (SHOW_COUNTS_NOW
) showCounts();
605 /* Fast-case some common ones. */
607 case X86G_CC_OP_LOGICL
:
608 case X86G_CC_OP_LOGICW
:
609 case X86G_CC_OP_LOGICB
:
611 case X86G_CC_OP_SUBL
:
612 return ((UInt
)cc_dep1
) < ((UInt
)cc_dep2
)
613 ? X86G_CC_MASK_C
: 0;
614 case X86G_CC_OP_SUBW
:
615 return ((UInt
)(cc_dep1
& 0xFFFF)) < ((UInt
)(cc_dep2
& 0xFFFF))
616 ? X86G_CC_MASK_C
: 0;
617 case X86G_CC_OP_SUBB
:
618 return ((UInt
)(cc_dep1
& 0xFF)) < ((UInt
)(cc_dep2
& 0xFF))
619 ? X86G_CC_MASK_C
: 0;
620 case X86G_CC_OP_INCL
:
621 case X86G_CC_OP_DECL
:
622 return cc_ndep
& X86G_CC_MASK_C
;
632 return x86g_calculate_eflags_all_WRK(cc_op
,cc_dep1
,cc_dep2
,cc_ndep
)
637 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
639 UInt
x86g_calculate_condition ( UInt
/*X86Condcode*/ cond
,
645 UInt eflags
= x86g_calculate_eflags_all_WRK(cc_op
, cc_dep1
,
651 if (!initted
) initCounts();
652 tab_cond
[cc_op
][cond
]++;
654 if (SHOW_COUNTS_NOW
) showCounts();
659 case X86CondO
: /* OF == 1 */
660 of
= eflags
>> X86G_CC_SHIFT_O
;
661 return 1 & (inv
^ of
);
664 case X86CondZ
: /* ZF == 1 */
665 zf
= eflags
>> X86G_CC_SHIFT_Z
;
666 return 1 & (inv
^ zf
);
669 case X86CondB
: /* CF == 1 */
670 cf
= eflags
>> X86G_CC_SHIFT_C
;
671 return 1 & (inv
^ cf
);
675 case X86CondBE
: /* (CF or ZF) == 1 */
676 cf
= eflags
>> X86G_CC_SHIFT_C
;
677 zf
= eflags
>> X86G_CC_SHIFT_Z
;
678 return 1 & (inv
^ (cf
| zf
));
682 case X86CondS
: /* SF == 1 */
683 sf
= eflags
>> X86G_CC_SHIFT_S
;
684 return 1 & (inv
^ sf
);
687 case X86CondP
: /* PF == 1 */
688 pf
= eflags
>> X86G_CC_SHIFT_P
;
689 return 1 & (inv
^ pf
);
692 case X86CondL
: /* (SF xor OF) == 1 */
693 sf
= eflags
>> X86G_CC_SHIFT_S
;
694 of
= eflags
>> X86G_CC_SHIFT_O
;
695 return 1 & (inv
^ (sf
^ of
));
699 case X86CondLE
: /* ((SF xor OF) or ZF) == 1 */
700 sf
= eflags
>> X86G_CC_SHIFT_S
;
701 of
= eflags
>> X86G_CC_SHIFT_O
;
702 zf
= eflags
>> X86G_CC_SHIFT_Z
;
703 return 1 & (inv
^ ((sf
^ of
) | zf
));
707 /* shouldn't really make these calls from generated code */
708 vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
709 cond
, cc_op
, cc_dep1
, cc_dep2
, cc_ndep
);
710 vpanic("x86g_calculate_condition");
715 /* VISIBLE TO LIBVEX CLIENT */
716 UInt
LibVEX_GuestX86_get_eflags ( /*IN*/const VexGuestX86State
* vex_state
)
718 UInt eflags
= x86g_calculate_eflags_all_WRK(
719 vex_state
->guest_CC_OP
,
720 vex_state
->guest_CC_DEP1
,
721 vex_state
->guest_CC_DEP2
,
722 vex_state
->guest_CC_NDEP
724 UInt dflag
= vex_state
->guest_DFLAG
;
725 vassert(dflag
== 1 || dflag
== 0xFFFFFFFF);
726 if (dflag
== 0xFFFFFFFF)
727 eflags
|= X86G_CC_MASK_D
;
728 if (vex_state
->guest_IDFLAG
== 1)
729 eflags
|= X86G_CC_MASK_ID
;
730 if (vex_state
->guest_ACFLAG
== 1)
731 eflags
|= X86G_CC_MASK_AC
;
736 /* VISIBLE TO LIBVEX CLIENT */
738 LibVEX_GuestX86_put_eflags ( UInt eflags
,
739 /*MOD*/VexGuestX86State
* vex_state
)
742 if (eflags
& X86G_CC_MASK_D
) {
743 vex_state
->guest_DFLAG
= 0xFFFFFFFF;
744 eflags
&= ~X86G_CC_MASK_D
;
747 vex_state
->guest_DFLAG
= 1;
750 if (eflags
& X86G_CC_MASK_ID
) {
751 vex_state
->guest_IDFLAG
= 1;
752 eflags
&= ~X86G_CC_MASK_ID
;
755 vex_state
->guest_IDFLAG
= 0;
758 if (eflags
& X86G_CC_MASK_AC
) {
759 vex_state
->guest_ACFLAG
= 1;
760 eflags
&= ~X86G_CC_MASK_AC
;
763 vex_state
->guest_ACFLAG
= 0;
765 UInt cc_mask
= X86G_CC_MASK_O
| X86G_CC_MASK_S
| X86G_CC_MASK_Z
|
766 X86G_CC_MASK_A
| X86G_CC_MASK_C
| X86G_CC_MASK_P
;
767 vex_state
->guest_CC_OP
= X86G_CC_OP_COPY
;
768 vex_state
->guest_CC_DEP1
= eflags
& cc_mask
;
769 vex_state
->guest_CC_DEP2
= 0;
770 vex_state
->guest_CC_NDEP
= 0;
773 /* VISIBLE TO LIBVEX CLIENT */
775 LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag
,
776 /*MOD*/VexGuestX86State
* vex_state
)
778 UInt oszacp
= x86g_calculate_eflags_all_WRK(
779 vex_state
->guest_CC_OP
,
780 vex_state
->guest_CC_DEP1
,
781 vex_state
->guest_CC_DEP2
,
782 vex_state
->guest_CC_NDEP
784 if (new_carry_flag
& 1) {
785 oszacp
|= X86G_CC_MASK_C
;
787 oszacp
&= ~X86G_CC_MASK_C
;
789 vex_state
->guest_CC_OP
= X86G_CC_OP_COPY
;
790 vex_state
->guest_CC_DEP1
= oszacp
;
791 vex_state
->guest_CC_DEP2
= 0;
792 vex_state
->guest_CC_NDEP
= 0;
796 /*---------------------------------------------------------------*/
797 /*--- %eflags translation-time function specialisers. ---*/
798 /*--- These help iropt specialise calls the above run-time ---*/
799 /*--- %eflags functions. ---*/
800 /*---------------------------------------------------------------*/
802 /* Used by the optimiser to try specialisations. Returns an
803 equivalent expression, or NULL if none. */
805 static inline Bool
isU32 ( IRExpr
* e
, UInt n
)
808 toBool( e
->tag
== Iex_Const
809 && e
->Iex
.Const
.con
->tag
== Ico_U32
810 && e
->Iex
.Const
.con
->Ico
.U32
== n
);
813 IRExpr
* guest_x86_spechelper ( const HChar
* function_name
,
815 IRStmt
** precedingStmts
,
816 Int n_precedingStmts
)
818 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
819 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
820 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
821 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
824 for (i
= 0; args
[i
]; i
++)
827 vex_printf("spec request:\n");
828 vex_printf(" %s ", function_name
);
829 for (i
= 0; i
< arity
; i
++) {
836 /* --------- specialising "x86g_calculate_condition" --------- */
838 if (vex_streq(function_name
, "x86g_calculate_condition")) {
839 /* specialise calls to above "calculate condition" function */
840 IRExpr
*cond
, *cc_op
, *cc_dep1
, *cc_dep2
;
847 /*---------------- ADDL ----------------*/
849 if (isU32(cc_op
, X86G_CC_OP_ADDL
) && isU32(cond
, X86CondZ
)) {
850 /* long add, then Z --> test (dst+src == 0) */
851 return unop(Iop_1Uto32
,
853 binop(Iop_Add32
, cc_dep1
, cc_dep2
),
857 /*---------------- SUBL ----------------*/
859 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondZ
)) {
860 /* long sub/cmp, then Z --> test dst==src */
861 return unop(Iop_1Uto32
,
862 binop(Iop_CmpEQ32
, cc_dep1
, cc_dep2
));
864 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNZ
)) {
865 /* long sub/cmp, then NZ --> test dst!=src */
866 return unop(Iop_1Uto32
,
867 binop(Iop_CmpNE32
, cc_dep1
, cc_dep2
));
870 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondL
)) {
871 /* long sub/cmp, then L (signed less than)
872 --> test dst <s src */
873 return unop(Iop_1Uto32
,
874 binop(Iop_CmpLT32S
, cc_dep1
, cc_dep2
));
876 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNL
)) {
877 /* long sub/cmp, then NL (signed greater than or equal)
878 --> test !(dst <s src) */
879 return binop(Iop_Xor32
,
881 binop(Iop_CmpLT32S
, cc_dep1
, cc_dep2
)),
885 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondLE
)) {
886 /* long sub/cmp, then LE (signed less than or equal)
887 --> test dst <=s src */
888 return unop(Iop_1Uto32
,
889 binop(Iop_CmpLE32S
, cc_dep1
, cc_dep2
));
891 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNLE
)) {
892 /* long sub/cmp, then NLE (signed not less than or equal)
894 --> test !(dst <=s src) */
895 return binop(Iop_Xor32
,
897 binop(Iop_CmpLE32S
, cc_dep1
, cc_dep2
)),
901 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondBE
)) {
902 /* long sub/cmp, then BE (unsigned less than or equal)
903 --> test dst <=u src */
904 return unop(Iop_1Uto32
,
905 binop(Iop_CmpLE32U
, cc_dep1
, cc_dep2
));
907 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNBE
)) {
908 /* long sub/cmp, then BE (unsigned greater than)
909 --> test !(dst <=u src) */
910 return binop(Iop_Xor32
,
912 binop(Iop_CmpLE32U
, cc_dep1
, cc_dep2
)),
916 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondB
)) {
917 /* long sub/cmp, then B (unsigned less than)
918 --> test dst <u src */
919 return unop(Iop_1Uto32
,
920 binop(Iop_CmpLT32U
, cc_dep1
, cc_dep2
));
922 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNB
)) {
923 /* long sub/cmp, then NB (unsigned greater than or equal)
924 --> test !(dst <u src) */
925 return binop(Iop_Xor32
,
927 binop(Iop_CmpLT32U
, cc_dep1
, cc_dep2
)),
931 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondS
)) {
932 /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
933 return unop(Iop_1Uto32
,
935 binop(Iop_Sub32
, cc_dep1
, cc_dep2
),
938 if (isU32(cc_op
, X86G_CC_OP_SUBL
) && isU32(cond
, X86CondNS
)) {
939 /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
940 return binop(Iop_Xor32
,
943 binop(Iop_Sub32
, cc_dep1
, cc_dep2
),
948 /*---------------- SUBW ----------------*/
950 if (isU32(cc_op
, X86G_CC_OP_SUBW
) && isU32(cond
, X86CondZ
)) {
951 /* word sub/cmp, then Z --> test dst==src */
952 return unop(Iop_1Uto32
,
954 unop(Iop_32to16
,cc_dep1
),
955 unop(Iop_32to16
,cc_dep2
)));
957 if (isU32(cc_op
, X86G_CC_OP_SUBW
) && isU32(cond
, X86CondNZ
)) {
958 /* word sub/cmp, then NZ --> test dst!=src */
959 return unop(Iop_1Uto32
,
961 unop(Iop_32to16
,cc_dep1
),
962 unop(Iop_32to16
,cc_dep2
)));
965 /*---------------- SUBB ----------------*/
967 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondZ
)) {
968 /* byte sub/cmp, then Z --> test dst==src */
969 return unop(Iop_1Uto32
,
971 unop(Iop_32to8
,cc_dep1
),
972 unop(Iop_32to8
,cc_dep2
)));
974 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondNZ
)) {
975 /* byte sub/cmp, then NZ --> test dst!=src */
976 return unop(Iop_1Uto32
,
978 unop(Iop_32to8
,cc_dep1
),
979 unop(Iop_32to8
,cc_dep2
)));
982 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondNBE
)) {
983 /* byte sub/cmp, then NBE (unsigned greater than)
984 --> test src <u dst */
985 /* Note, args are opposite way round from the usual */
986 return unop(Iop_1Uto32
,
988 binop(Iop_And32
,cc_dep2
,mkU32(0xFF)),
989 binop(Iop_And32
,cc_dep1
,mkU32(0xFF))));
992 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondS
)
993 && isU32(cc_dep2
, 0)) {
994 /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
997 This is yet another scheme by which gcc figures out if the
998 top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
999 /* Note: isU32(cc_dep2, 0) is correct, even though this is
1000 for an 8-bit comparison, since the args to the helper
1001 function are always U32s. */
1002 return binop(Iop_And32
,
1003 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1006 if (isU32(cc_op
, X86G_CC_OP_SUBB
) && isU32(cond
, X86CondNS
)
1007 && isU32(cc_dep2
, 0)) {
1008 /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
1009 --> test !(dst <s 0)
1012 return binop(Iop_Xor32
,
1014 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1019 /*---------------- LOGICL ----------------*/
1021 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondZ
)) {
1022 /* long and/or/xor, then Z --> test dst==0 */
1023 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1025 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondNZ
)) {
1026 /* long and/or/xor, then NZ --> test dst!=0 */
1027 return unop(Iop_1Uto32
,binop(Iop_CmpNE32
, cc_dep1
, mkU32(0)));
1030 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondLE
)) {
1031 /* long and/or/xor, then LE
1032 This is pretty subtle. LOGIC sets SF and ZF according to the
1033 result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
1034 OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
1035 the result is <=signed 0. Hence ...
1037 return unop(Iop_1Uto32
,binop(Iop_CmpLE32S
, cc_dep1
, mkU32(0)));
1040 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondBE
)) {
1041 /* long and/or/xor, then BE
1042 LOGIC sets ZF according to the result and makes CF be zero.
1043 BE computes (CF | ZF), but CF is zero, so this reduces ZF
1044 -- which will be 1 iff the result is zero. Hence ...
1046 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1049 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondS
)) {
1050 /* see comment below for (LOGICB, CondS) */
1051 /* long and/or/xor, then S --> (UInt)result[31] */
1052 return binop(Iop_And32
,
1053 binop(Iop_Shr32
,cc_dep1
,mkU8(31)),
1056 if (isU32(cc_op
, X86G_CC_OP_LOGICL
) && isU32(cond
, X86CondNS
)) {
1057 /* see comment below for (LOGICB, CondNS) */
1058 /* long and/or/xor, then S --> (UInt) ~ result[31] */
1059 return binop(Iop_Xor32
,
1061 binop(Iop_Shr32
,cc_dep1
,mkU8(31)),
1066 /*---------------- LOGICW ----------------*/
1068 if (isU32(cc_op
, X86G_CC_OP_LOGICW
) && isU32(cond
, X86CondZ
)) {
1069 /* word and/or/xor, then Z --> test dst==0 */
1070 return unop(Iop_1Uto32
,
1071 binop(Iop_CmpEQ32
, binop(Iop_And32
,cc_dep1
,mkU32(0xFFFF)),
1075 if (isU32(cc_op
, X86G_CC_OP_LOGICW
) && isU32(cond
, X86CondS
)) {
1076 /* see comment below for (LOGICB, CondS) */
1077 /* word and/or/xor, then S --> (UInt)result[15] */
1078 return binop(Iop_And32
,
1079 binop(Iop_Shr32
,cc_dep1
,mkU8(15)),
1083 /*---------------- LOGICB ----------------*/
1085 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondZ
)) {
1086 /* byte and/or/xor, then Z --> test dst==0 */
1087 return unop(Iop_1Uto32
,
1088 binop(Iop_CmpEQ32
, binop(Iop_And32
,cc_dep1
,mkU32(255)),
1091 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondNZ
)) {
1092 /* byte and/or/xor, then Z --> test dst!=0 */
1093 /* b9ac9: 84 c0 test %al,%al
1094 b9acb: 75 0d jne b9ada */
1095 return unop(Iop_1Uto32
,
1096 binop(Iop_CmpNE32
, binop(Iop_And32
,cc_dep1
,mkU32(255)),
1100 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondS
)) {
1101 /* this is an idiom gcc sometimes uses to find out if the top
1102 bit of a byte register is set: eg testb %al,%al; js ..
1103 Since it just depends on the top bit of the byte, extract
1104 that bit and explicitly get rid of all the rest. This
1105 helps memcheck avoid false positives in the case where any
1106 of the other bits in the byte are undefined. */
1107 /* byte and/or/xor, then S --> (UInt)result[7] */
1108 return binop(Iop_And32
,
1109 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1112 if (isU32(cc_op
, X86G_CC_OP_LOGICB
) && isU32(cond
, X86CondNS
)) {
1113 /* ditto, for negation-of-S. */
1114 /* byte and/or/xor, then S --> (UInt) ~ result[7] */
1115 return binop(Iop_Xor32
,
1117 binop(Iop_Shr32
,cc_dep1
,mkU8(7)),
1122 /*---------------- DECL ----------------*/
1124 if (isU32(cc_op
, X86G_CC_OP_DECL
) && isU32(cond
, X86CondZ
)) {
1125 /* dec L, then Z --> test dst == 0 */
1126 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1129 if (isU32(cc_op
, X86G_CC_OP_DECL
) && isU32(cond
, X86CondS
)) {
1130 /* dec L, then S --> compare DST <s 0 */
1131 return unop(Iop_1Uto32
,binop(Iop_CmpLT32S
, cc_dep1
, mkU32(0)));
1134 /*---------------- DECW ----------------*/
1136 if (isU32(cc_op
, X86G_CC_OP_DECW
) && isU32(cond
, X86CondZ
)) {
1137 /* dec W, then Z --> test dst == 0 */
1138 return unop(Iop_1Uto32
,
1140 binop(Iop_Shl32
,cc_dep1
,mkU8(16)),
1144 /*---------------- INCW ----------------*/
1146 if (isU32(cc_op
, X86G_CC_OP_INCW
) && isU32(cond
, X86CondZ
)) {
1147 /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
1148 /* inc W, then Z --> test dst == 0 */
1149 return unop(Iop_1Uto32
,
1151 binop(Iop_Shl32
,cc_dep1
,mkU8(16)),
1155 /*---------------- SHRL ----------------*/
1157 if (isU32(cc_op
, X86G_CC_OP_SHRL
) && isU32(cond
, X86CondZ
)) {
1158 /* SHRL, then Z --> test dep1 == 0 */
1159 return unop(Iop_1Uto32
,binop(Iop_CmpEQ32
, cc_dep1
, mkU32(0)));
1162 /*---------------- COPY ----------------*/
1163 /* This can happen, as a result of x87 FP compares: "fcom ... ;
1164 fnstsw %ax ; sahf ; jbe" for example. */
1166 if (isU32(cc_op
, X86G_CC_OP_COPY
) &&
1167 (isU32(cond
, X86CondBE
) || isU32(cond
, X86CondNBE
))) {
1168 /* COPY, then BE --> extract C and Z from dep1, and test
1170 /* COPY, then NBE --> extract C and Z from dep1, and test
1172 UInt nnn
= isU32(cond
, X86CondBE
) ? 1 : 0;
1182 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_C
)),
1183 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_Z
))
1192 if (isU32(cc_op
, X86G_CC_OP_COPY
)
1193 && (isU32(cond
, X86CondB
) || isU32(cond
, X86CondNB
))) {
1194 /* COPY, then B --> extract C from dep1, and test (C == 1). */
1195 /* COPY, then NB --> extract C from dep1, and test (C == 0). */
1196 UInt nnn
= isU32(cond
, X86CondB
) ? 1 : 0;
1204 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_C
)),
1212 if (isU32(cc_op
, X86G_CC_OP_COPY
)
1213 && (isU32(cond
, X86CondZ
) || isU32(cond
, X86CondNZ
))) {
1214 /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
1215 /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
1216 UInt nnn
= isU32(cond
, X86CondZ
) ? 1 : 0;
1224 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_Z
)),
1232 if (isU32(cc_op
, X86G_CC_OP_COPY
)
1233 && (isU32(cond
, X86CondP
) || isU32(cond
, X86CondNP
))) {
1234 /* COPY, then P --> extract P from dep1, and test (P == 1). */
1235 /* COPY, then NP --> extract P from dep1, and test (P == 0). */
1236 UInt nnn
= isU32(cond
, X86CondP
) ? 1 : 0;
1244 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_P
)),
1255 /* --------- specialising "x86g_calculate_eflags_c" --------- */
1257 if (vex_streq(function_name
, "x86g_calculate_eflags_c")) {
1258 /* specialise calls to above "calculate_eflags_c" function */
1259 IRExpr
*cc_op
, *cc_dep1
, *cc_dep2
, *cc_ndep
;
1260 vassert(arity
== 4);
1266 if (isU32(cc_op
, X86G_CC_OP_SUBL
)) {
1267 /* C after sub denotes unsigned less than */
1268 return unop(Iop_1Uto32
,
1269 binop(Iop_CmpLT32U
, cc_dep1
, cc_dep2
));
1271 if (isU32(cc_op
, X86G_CC_OP_SUBB
)) {
1272 /* C after sub denotes unsigned less than */
1273 return unop(Iop_1Uto32
,
1275 binop(Iop_And32
,cc_dep1
,mkU32(0xFF)),
1276 binop(Iop_And32
,cc_dep2
,mkU32(0xFF))));
1278 if (isU32(cc_op
, X86G_CC_OP_LOGICL
)
1279 || isU32(cc_op
, X86G_CC_OP_LOGICW
)
1280 || isU32(cc_op
, X86G_CC_OP_LOGICB
)) {
1281 /* cflag after logic is zero */
1284 if (isU32(cc_op
, X86G_CC_OP_DECL
) || isU32(cc_op
, X86G_CC_OP_INCL
)) {
1285 /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
1288 if (isU32(cc_op
, X86G_CC_OP_COPY
)) {
1289 /* cflag after COPY is stored in DEP1. */
1293 binop(Iop_Shr32
, cc_dep1
, mkU8(X86G_CC_SHIFT_C
)),
1297 if (isU32(cc_op
, X86G_CC_OP_ADDL
)) {
1298 /* C after add denotes sum <u either arg */
1299 return unop(Iop_1Uto32
,
1301 binop(Iop_Add32
, cc_dep1
, cc_dep2
),
1304 // ATC, requires verification, no test case known
1305 //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
1306 // /* C after signed widening multiply denotes the case where
1307 // the top half of the result isn't simply the sign extension
1308 // of the bottom half (iow the result doesn't fit completely
1309 // in the bottom half). Hence:
1310 // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
1311 // where 'x' denotes signed widening multiply.*/
1314 // binop(Iop_CmpNE32,
1315 // unop(Iop_64HIto32,
1316 // binop(Iop_MullS32, cc_dep1, cc_dep2)),
1318 // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
1321 if (cc_op
->tag
== Iex_Const
) {
1322 vex_printf("CFLAG "); ppIRExpr(cc_op
); vex_printf("\n");
1329 /* --------- specialising "x86g_calculate_eflags_all" --------- */
1331 if (vex_streq(function_name
, "x86g_calculate_eflags_all")) {
1332 /* specialise calls to above "calculate_eflags_all" function */
1333 IRExpr
*cc_op
, *cc_dep1
; /*, *cc_dep2, *cc_ndep; */
1334 vassert(arity
== 4);
1337 /* cc_dep2 = args[2]; */
1338 /* cc_ndep = args[3]; */
1340 if (isU32(cc_op
, X86G_CC_OP_COPY
)) {
1341 /* eflags after COPY are stored in DEP1. */
1346 mkU32(X86G_CC_MASK_O
| X86G_CC_MASK_S
| X86G_CC_MASK_Z
1347 | X86G_CC_MASK_A
| X86G_CC_MASK_C
| X86G_CC_MASK_P
)
1362 /*---------------------------------------------------------------*/
1363 /*--- Supporting functions for x87 FPU activities. ---*/
1364 /*---------------------------------------------------------------*/
1366 static inline Bool
host_is_little_endian ( void )
1368 UInt x
= 0x76543210;
1369 UChar
* p
= (UChar
*)(&x
);
1370 return toBool(*p
== 0x10);
1373 /* 80 and 64-bit floating point formats:
1378 S 0 0X------X denormals
1379 S 1-7FFE 1X------X normals (all normals have leading 1)
1380 S 7FFF 10------0 infinity
1381 S 7FFF 10X-----X snan
1382 S 7FFF 11X-----X qnan
1384 S is the sign bit. For runs X----X, at least one of the Xs must be
1385 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
1386 there is an explicitly represented leading 1, and a sign bit,
1389 64-bit avoids the confusion of an explicitly represented leading 1
1393 S 0 X------X denormals
1395 S 7FF 0------0 infinity
1399 Exponent is 11 bits, fractional part is 52 bits, and there is a
1400 sign bit, giving 64 in total.
1403 /* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
1404 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1405 UInt
x86g_calculate_FXAM ( UInt tag
, ULong dbl
)
1407 Bool mantissaIsZero
;
1412 vassert(host_is_little_endian());
1414 /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
1416 f64
= (UChar
*)(&dbl
);
1417 sign
= toUChar( (f64
[7] >> 7) & 1 );
1419 /* First off, if the tag indicates the register was empty,
1420 return 1,0,sign,1 */
1422 /* vex_printf("Empty\n"); */
1423 return X86G_FC_MASK_C3
| 0 | (sign
<< X86G_FC_SHIFT_C1
)
1427 bexp
= (f64
[7] << 4) | ((f64
[6] >> 4) & 0x0F);
1432 (f64
[6] & 0x0F) == 0
1433 && (f64
[5] | f64
[4] | f64
[3] | f64
[2] | f64
[1] | f64
[0]) == 0
1436 /* If both exponent and mantissa are zero, the value is zero.
1437 Return 1,0,sign,0. */
1438 if (bexp
== 0 && mantissaIsZero
) {
1439 /* vex_printf("Zero\n"); */
1440 return X86G_FC_MASK_C3
| 0
1441 | (sign
<< X86G_FC_SHIFT_C1
) | 0;
1444 /* If exponent is zero but mantissa isn't, it's a denormal.
1445 Return 1,1,sign,0. */
1446 if (bexp
== 0 && !mantissaIsZero
) {
1447 /* vex_printf("Denormal\n"); */
1448 return X86G_FC_MASK_C3
| X86G_FC_MASK_C2
1449 | (sign
<< X86G_FC_SHIFT_C1
) | 0;
1452 /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
1453 Return 0,1,sign,1. */
1454 if (bexp
== 0x7FF && mantissaIsZero
) {
1455 /* vex_printf("Inf\n"); */
1456 return 0 | X86G_FC_MASK_C2
| (sign
<< X86G_FC_SHIFT_C1
)
1460 /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
1461 Return 0,0,sign,1. */
1462 if (bexp
== 0x7FF && !mantissaIsZero
) {
1463 /* vex_printf("NaN\n"); */
1464 return 0 | 0 | (sign
<< X86G_FC_SHIFT_C1
) | X86G_FC_MASK_C0
;
1467 /* Uh, ok, we give up. It must be a normal finite number.
1470 /* vex_printf("normal\n"); */
1471 return 0 | X86G_FC_MASK_C2
| (sign
<< X86G_FC_SHIFT_C1
) | 0;
1475 /* CALLED FROM GENERATED CODE */
1476 /* DIRTY HELPER (reads guest memory) */
1477 ULong
x86g_dirtyhelper_loadF80le ( Addr addrU
)
1480 convert_f80le_to_f64le ( (UChar
*)addrU
, (UChar
*)&f64
);
1484 /* CALLED FROM GENERATED CODE */
1485 /* DIRTY HELPER (writes guest memory) */
1486 void x86g_dirtyhelper_storeF80le ( Addr addrU
, ULong f64
)
1488 convert_f64le_to_f80le( (UChar
*)&f64
, (UChar
*)addrU
);
1492 /*----------------------------------------------*/
1493 /*--- The exported fns .. ---*/
1494 /*----------------------------------------------*/
1496 /* Layout of the real x87 state. */
1497 /* 13 June 05: Fpu_State and auxiliary constants was moved to
1502 /* fpucw[15:0] contains a x87 native format FPU control word.
1503 Extract from it the required FPROUND value and any resulting
1504 emulation warning, and return (warn << 32) | fpround value.
1506 ULong
x86g_check_fldcw ( UInt fpucw
)
1508 /* Decide on a rounding mode. fpucw[11:10] holds it. */
1509 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1510 UInt rmode
= (fpucw
>> 10) & 3;
1512 /* Detect any required emulation warnings. */
1513 VexEmNote ew
= EmNote_NONE
;
1515 if ((fpucw
& 0x3F) != 0x3F) {
1516 /* unmasked exceptions! */
1517 ew
= EmWarn_X86_x87exns
;
1520 if (((fpucw
>> 8) & 3) != 3) {
1521 /* unsupported precision */
1522 ew
= EmWarn_X86_x87precision
;
1525 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
1529 /* Given fpround as an IRRoundingMode value, create a suitable x87
1530 native format FPU control word. */
1531 UInt
x86g_create_fpucw ( UInt fpround
)
1534 return 0x037F | (fpround
<< 10);
1539 /* mxcsr[15:0] contains a SSE native format MXCSR value.
1540 Extract from it the required SSEROUND value and any resulting
1541 emulation warning, and return (warn << 32) | sseround value.
1543 ULong
x86g_check_ldmxcsr ( UInt mxcsr
)
1545 /* Decide on a rounding mode. mxcsr[14:13] holds it. */
1546 /* NOTE, encoded exactly as per enum IRRoundingMode. */
1547 UInt rmode
= (mxcsr
>> 13) & 3;
1549 /* Detect any required emulation warnings. */
1550 VexEmNote ew
= EmNote_NONE
;
1552 if ((mxcsr
& 0x1F80) != 0x1F80) {
1553 /* unmasked exceptions! */
1554 ew
= EmWarn_X86_sseExns
;
1557 if (mxcsr
& (1<<15)) {
1562 if (mxcsr
& (1<<6)) {
1564 ew
= EmWarn_X86_daz
;
1567 return (((ULong
)ew
) << 32) | ((ULong
)rmode
);
1572 /* Given sseround as an IRRoundingMode value, create a suitable SSE
1573 native format MXCSR value. */
1574 UInt
x86g_create_mxcsr ( UInt sseround
)
1577 return 0x1F80 | (sseround
<< 13);
1581 /* CALLED FROM GENERATED CODE */
1582 /* DIRTY HELPER (writes guest state) */
1583 /* Initialise the x87 FPU state as per 'finit'. */
1584 void x86g_dirtyhelper_FINIT ( VexGuestX86State
* gst
)
1587 gst
->guest_FTOP
= 0;
1588 for (i
= 0; i
< 8; i
++) {
1589 gst
->guest_FPTAG
[i
] = 0; /* empty */
1590 gst
->guest_FPREG
[i
] = 0; /* IEEE754 64-bit zero */
1592 gst
->guest_FPROUND
= (UInt
)Irrm_NEAREST
;
1593 gst
->guest_FC3210
= 0;
1597 /* This is used to implement both 'frstor' and 'fldenv'. The latter
1598 appears to differ from the former only in that the 8 FP registers
1599 themselves are not transferred into the guest state. */
1601 VexEmNote
do_put_x87 ( Bool moveRegs
,
1602 /*IN*/Fpu_State
* x87_state
,
1603 /*OUT*/VexGuestX86State
* vex_state
)
1607 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
1608 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
1609 UInt ftop
= (x87_state
->env
[FP_ENV_STAT
] >> 11) & 7;
1610 UInt tagw
= x87_state
->env
[FP_ENV_TAG
];
1611 UInt fpucw
= x87_state
->env
[FP_ENV_CTRL
];
1612 UInt c3210
= x87_state
->env
[FP_ENV_STAT
] & 0x4700;
1617 /* Copy registers and tags */
1618 for (stno
= 0; stno
< 8; stno
++) {
1619 preg
= (stno
+ ftop
) & 7;
1620 tag
= (tagw
>> (2*preg
)) & 3;
1622 /* register is empty */
1623 /* hmm, if it's empty, does it still get written? Probably
1624 safer to say it does. If we don't, memcheck could get out
1625 of sync, in that it thinks all FP registers are defined by
1626 this helper, but in reality some have not been updated. */
1628 vexRegs
[preg
] = 0; /* IEEE754 64-bit zero */
1631 /* register is non-empty */
1633 convert_f80le_to_f64le( &x87_state
->reg
[10*stno
],
1634 (UChar
*)&vexRegs
[preg
] );
1640 vex_state
->guest_FTOP
= ftop
;
1643 vex_state
->guest_FC3210
= c3210
;
1645 /* handle the control word, setting FPROUND and detecting any
1646 emulation warnings. */
1647 pair
= x86g_check_fldcw ( (UInt
)fpucw
);
1648 fpround
= (UInt
)pair
;
1649 ew
= (VexEmNote
)(pair
>> 32);
1651 vex_state
->guest_FPROUND
= fpround
& 3;
1653 /* emulation warnings --> caller */
1658 /* Create an x87 FPU state from the guest state, as close as
1659 we can approximate it. */
1661 void do_get_x87 ( /*IN*/VexGuestX86State
* vex_state
,
1662 /*OUT*/Fpu_State
* x87_state
)
1666 ULong
* vexRegs
= (ULong
*)(&vex_state
->guest_FPREG
[0]);
1667 UChar
* vexTags
= (UChar
*)(&vex_state
->guest_FPTAG
[0]);
1668 UInt ftop
= vex_state
->guest_FTOP
;
1669 UInt c3210
= vex_state
->guest_FC3210
;
1671 for (i
= 0; i
< 14; i
++)
1672 x87_state
->env
[i
] = 0;
1674 x87_state
->env
[1] = x87_state
->env
[3] = x87_state
->env
[5]
1675 = x87_state
->env
[13] = 0xFFFF;
1676 x87_state
->env
[FP_ENV_STAT
]
1677 = toUShort(((ftop
& 7) << 11) | (c3210
& 0x4700));
1678 x87_state
->env
[FP_ENV_CTRL
]
1679 = toUShort(x86g_create_fpucw( vex_state
->guest_FPROUND
));
1681 /* Dump the register stack in ST order. */
1683 for (stno
= 0; stno
< 8; stno
++) {
1684 preg
= (stno
+ ftop
) & 7;
1685 if (vexTags
[preg
] == 0) {
1686 /* register is empty */
1687 tagw
|= (3 << (2*preg
));
1688 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
1689 &x87_state
->reg
[10*stno
] );
1691 /* register is full. */
1692 tagw
|= (0 << (2*preg
));
1693 convert_f64le_to_f80le( (UChar
*)&vexRegs
[preg
],
1694 &x87_state
->reg
[10*stno
] );
1697 x87_state
->env
[FP_ENV_TAG
] = toUShort(tagw
);
1701 /* CALLED FROM GENERATED CODE */
1702 /* DIRTY HELPER (reads guest state, writes guest mem) */
1703 void x86g_dirtyhelper_FXSAVE ( VexGuestX86State
* gst
, HWord addr
)
1705 /* Somewhat roundabout, but at least it's simple. */
1707 UShort
* addrS
= (UShort
*)addr
;
1708 UChar
* addrC
= (UChar
*)addr
;
1709 U128
* xmm
= (U128
*)(addr
+ 160);
1714 UShort
*srcS
, *dstS
;
1716 do_get_x87( gst
, &tmp
);
1717 mxcsr
= x86g_create_mxcsr( gst
->guest_SSEROUND
);
1719 /* Now build the proper fxsave image from the x87 image we just
1722 addrS
[0] = tmp
.env
[FP_ENV_CTRL
]; /* FCW: fpu control word */
1723 addrS
[1] = tmp
.env
[FP_ENV_STAT
]; /* FCW: fpu status word */
1725 /* set addrS[2] in an endian-independent way */
1727 fp_tags
= tmp
.env
[FP_ENV_TAG
];
1728 for (r
= 0; r
< 8; r
++) {
1729 if ( ((fp_tags
>> (2*r
)) & 3) != 3 )
1730 summary_tags
|= (1 << r
);
1732 addrC
[4] = toUChar(summary_tags
); /* FTW: tag summary byte */
1733 addrC
[5] = 0; /* pad */
1735 addrS
[3] = 0; /* FOP: fpu opcode (bogus) */
1737 addrS
[5] = 0; /* FPU IP (bogus) */
1738 addrS
[6] = 0; /* FPU IP's segment selector (bogus) (although we
1739 could conceivably dump %CS here) */
1741 addrS
[7] = 0; /* Intel reserved */
1743 addrS
[8] = 0; /* FPU DP (operand pointer) (bogus) */
1744 addrS
[9] = 0; /* FPU DP (operand pointer) (bogus) */
1745 addrS
[10] = 0; /* segment selector for above operand pointer; %DS
1747 addrS
[11] = 0; /* Intel reserved */
1749 addrS
[12] = toUShort(mxcsr
); /* MXCSR */
1750 addrS
[13] = toUShort(mxcsr
>> 16);
1752 addrS
[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
1753 addrS
[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
1755 /* Copy in the FP registers, in ST order. */
1756 for (stno
= 0; stno
< 8; stno
++) {
1757 srcS
= (UShort
*)(&tmp
.reg
[10*stno
]);
1758 dstS
= (UShort
*)(&addrS
[16 + 8*stno
]);
1769 /* That's the first 160 bytes of the image done. Now only %xmm0
1770 .. %xmm7 remain to be copied. If the host is big-endian, these
1771 need to be byte-swapped. */
1772 vassert(host_is_little_endian());
1774 # define COPY_U128(_dst,_src) \
1775 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1776 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1779 COPY_U128( xmm
[0], gst
->guest_XMM0
);
1780 COPY_U128( xmm
[1], gst
->guest_XMM1
);
1781 COPY_U128( xmm
[2], gst
->guest_XMM2
);
1782 COPY_U128( xmm
[3], gst
->guest_XMM3
);
1783 COPY_U128( xmm
[4], gst
->guest_XMM4
);
1784 COPY_U128( xmm
[5], gst
->guest_XMM5
);
1785 COPY_U128( xmm
[6], gst
->guest_XMM6
);
1786 COPY_U128( xmm
[7], gst
->guest_XMM7
);
1792 /* CALLED FROM GENERATED CODE */
1793 /* DIRTY HELPER (writes guest state, reads guest mem) */
1794 VexEmNote
x86g_dirtyhelper_FXRSTOR ( VexGuestX86State
* gst
, HWord addr
)
1797 VexEmNote warnX87
= EmNote_NONE
;
1798 VexEmNote warnXMM
= EmNote_NONE
;
1799 UShort
* addrS
= (UShort
*)addr
;
1800 UChar
* addrC
= (UChar
*)addr
;
1801 U128
* xmm
= (U128
*)(addr
+ 160);
1805 /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
1806 to be byte-swapped. */
1807 vassert(host_is_little_endian());
1809 # define COPY_U128(_dst,_src) \
1810 do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
1811 _dst[2] = _src[2]; _dst[3] = _src[3]; } \
1814 COPY_U128( gst
->guest_XMM0
, xmm
[0] );
1815 COPY_U128( gst
->guest_XMM1
, xmm
[1] );
1816 COPY_U128( gst
->guest_XMM2
, xmm
[2] );
1817 COPY_U128( gst
->guest_XMM3
, xmm
[3] );
1818 COPY_U128( gst
->guest_XMM4
, xmm
[4] );
1819 COPY_U128( gst
->guest_XMM5
, xmm
[5] );
1820 COPY_U128( gst
->guest_XMM6
, xmm
[6] );
1821 COPY_U128( gst
->guest_XMM7
, xmm
[7] );
1825 /* Copy the x87 registers out of the image, into a temporary
1826 Fpu_State struct. */
1828 /* LLVM on Darwin turns the following loop into a movaps plus a
1829 handful of scalar stores. This would work fine except for the
1830 fact that VEX doesn't keep the stack correctly (16-) aligned for
1831 the call, so it segfaults. Hence, split the loop into two
1832 pieces (and pray LLVM doesn't merely glue them back together) so
1833 it's composed only of scalar stores and so is alignment
1834 insensitive. Of course this is a kludge of the lamest kind --
1835 VEX should be fixed properly. */
1836 /* Code that seems to trigger the problem:
1837 for (i = 0; i < 14; i++) tmp.env[i] = 0; */
1838 for (i
= 0; i
< 7; i
++) tmp
.env
[i
+0] = 0;
1839 __asm__
__volatile__("" ::: "memory");
1840 for (i
= 0; i
< 7; i
++) tmp
.env
[i
+7] = 0;
1842 for (i
= 0; i
< 80; i
++) tmp
.reg
[i
] = 0;
1843 /* fill in tmp.reg[0..7] */
1844 for (stno
= 0; stno
< 8; stno
++) {
1845 UShort
* dstS
= (UShort
*)(&tmp
.reg
[10*stno
]);
1846 UShort
* srcS
= (UShort
*)(&addrS
[16 + 8*stno
]);
1853 /* fill in tmp.env[0..13] */
1854 tmp
.env
[FP_ENV_CTRL
] = addrS
[0]; /* FCW: fpu control word */
1855 tmp
.env
[FP_ENV_STAT
] = addrS
[1]; /* FCW: fpu status word */
1858 for (r
= 0; r
< 8; r
++) {
1859 if (addrC
[4] & (1<<r
))
1860 fp_tags
|= (0 << (2*r
)); /* EMPTY */
1862 fp_tags
|= (3 << (2*r
)); /* VALID -- not really precise enough. */
1864 tmp
.env
[FP_ENV_TAG
] = fp_tags
;
1866 /* Now write 'tmp' into the guest state. */
1867 warnX87
= do_put_x87( True
/*moveRegs*/, &tmp
, gst
);
1869 { UInt w32
= (((UInt
)addrS
[12]) & 0xFFFF)
1870 | ((((UInt
)addrS
[13]) & 0xFFFF) << 16);
1871 ULong w64
= x86g_check_ldmxcsr( w32
);
1873 warnXMM
= (VexEmNote
)(w64
>> 32);
1875 gst
->guest_SSEROUND
= w64
& 0xFFFFFFFF;
1878 /* Prefer an X87 emwarn over an XMM one, if both exist. */
1879 if (warnX87
!= EmNote_NONE
)
1886 /* CALLED FROM GENERATED CODE */
1887 /* DIRTY HELPER (reads guest state, writes guest mem) */
1888 void x86g_dirtyhelper_FSAVE ( VexGuestX86State
* gst
, HWord addr
)
1890 do_get_x87( gst
, (Fpu_State
*)addr
);
1893 /* CALLED FROM GENERATED CODE */
1894 /* DIRTY HELPER (writes guest state, reads guest mem) */
1895 VexEmNote
x86g_dirtyhelper_FRSTOR ( VexGuestX86State
* gst
, HWord addr
)
1897 return do_put_x87( True
/*regs too*/, (Fpu_State
*)addr
, gst
);
1900 /* CALLED FROM GENERATED CODE */
1901 /* DIRTY HELPER (reads guest state, writes guest mem) */
1902 void x86g_dirtyhelper_FSTENV ( VexGuestX86State
* gst
, HWord addr
)
1904 /* Somewhat roundabout, but at least it's simple. */
1906 UShort
* addrP
= (UShort
*)addr
;
1908 do_get_x87( gst
, &tmp
);
1909 for (i
= 0; i
< 14; i
++)
1910 addrP
[i
] = tmp
.env
[i
];
1913 /* CALLED FROM GENERATED CODE */
1914 /* DIRTY HELPER (writes guest state, reads guest mem) */
1915 VexEmNote
x86g_dirtyhelper_FLDENV ( VexGuestX86State
* gst
, HWord addr
)
1917 return do_put_x87( False
/*don't move regs*/, (Fpu_State
*)addr
, gst
);
1920 /* VISIBLE TO LIBVEX CLIENT */
1921 /* Do x87 save from the supplied VexGuestX86State structure and store the
1922 result at the given address which represents a buffer of at least 108
1924 void LibVEX_GuestX86_get_x87 ( /*IN*/VexGuestX86State
* vex_state
,
1925 /*OUT*/UChar
* x87_state
)
1927 do_get_x87 ( vex_state
, (Fpu_State
*)x87_state
);
1930 /* VISIBLE TO LIBVEX CLIENT */
1931 /* Do x87 restore from the supplied address and store read values to the given
1932 VexGuestX86State structure. */
1933 VexEmNote
LibVEX_GuestX86_put_x87 ( /*IN*/UChar
* x87_state
,
1934 /*MOD*/VexGuestX86State
* vex_state
)
1936 return do_put_x87 ( True
/*moveRegs*/, (Fpu_State
*)x87_state
, vex_state
);
1939 /* VISIBLE TO LIBVEX CLIENT */
1940 /* Return mxcsr from the supplied VexGuestX86State structure. */
1941 UInt
LibVEX_GuestX86_get_mxcsr ( /*IN*/VexGuestX86State
* vex_state
)
1943 return x86g_create_mxcsr ( vex_state
->guest_SSEROUND
);
1946 /* VISIBLE TO LIBVEX CLIENT */
1947 /* Modify the given VexGuestX86State structure according to the passed mxcsr
1949 VexEmNote
LibVEX_GuestX86_put_mxcsr ( /*IN*/UInt mxcsr
,
1950 /*MOD*/VexGuestX86State
* vex_state
)
1952 ULong w64
= x86g_check_ldmxcsr( mxcsr
);
1953 vex_state
->guest_SSEROUND
= w64
& 0xFFFFFFFF;
1954 return (VexEmNote
)(w64
>> 32);
1957 /*---------------------------------------------------------------*/
1958 /*--- Misc integer helpers, including rotates and CPUID. ---*/
1959 /*---------------------------------------------------------------*/
1961 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
1962 /* Calculate both flags and value result for rotate right
1963 through the carry bit. Result in low 32 bits,
1964 new flags (OSZACP) in high 32 bits.
1966 ULong
x86g_calculate_RCR ( UInt arg
, UInt rot_amt
, UInt eflags_in
, UInt sz
)
1968 UInt tempCOUNT
= rot_amt
& 0x1F, cf
=0, of
=0, tempcf
;
1972 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
1973 of
= ((arg
>> 31) ^ cf
) & 1;
1974 while (tempCOUNT
> 0) {
1976 arg
= (arg
>> 1) | (cf
<< 31);
1982 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
1983 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
1984 of
= ((arg
>> 15) ^ cf
) & 1;
1985 while (tempCOUNT
> 0) {
1987 arg
= ((arg
>> 1) & 0x7FFF) | (cf
<< 15);
1993 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
1994 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
1995 of
= ((arg
>> 7) ^ cf
) & 1;
1996 while (tempCOUNT
> 0) {
1998 arg
= ((arg
>> 1) & 0x7F) | (cf
<< 7);
2004 vpanic("calculate_RCR: invalid size");
2009 eflags_in
&= ~(X86G_CC_MASK_C
| X86G_CC_MASK_O
);
2010 eflags_in
|= (cf
<< X86G_CC_SHIFT_C
) | (of
<< X86G_CC_SHIFT_O
);
2012 return (((ULong
)eflags_in
) << 32) | ((ULong
)arg
);
2016 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2017 /* Calculate both flags and value result for rotate left
2018 through the carry bit. Result in low 32 bits,
2019 new flags (OSZACP) in high 32 bits.
2021 ULong
x86g_calculate_RCL ( UInt arg
, UInt rot_amt
, UInt eflags_in
, UInt sz
)
2023 UInt tempCOUNT
= rot_amt
& 0x1F, cf
=0, of
=0, tempcf
;
2027 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2028 while (tempCOUNT
> 0) {
2029 tempcf
= (arg
>> 31) & 1;
2030 arg
= (arg
<< 1) | (cf
& 1);
2034 of
= ((arg
>> 31) ^ cf
) & 1;
2037 while (tempCOUNT
>= 17) tempCOUNT
-= 17;
2038 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2039 while (tempCOUNT
> 0) {
2040 tempcf
= (arg
>> 15) & 1;
2041 arg
= 0xFFFF & ((arg
<< 1) | (cf
& 1));
2045 of
= ((arg
>> 15) ^ cf
) & 1;
2048 while (tempCOUNT
>= 9) tempCOUNT
-= 9;
2049 cf
= (eflags_in
>> X86G_CC_SHIFT_C
) & 1;
2050 while (tempCOUNT
> 0) {
2051 tempcf
= (arg
>> 7) & 1;
2052 arg
= 0xFF & ((arg
<< 1) | (cf
& 1));
2056 of
= ((arg
>> 7) ^ cf
) & 1;
2059 vpanic("calculate_RCL: invalid size");
2064 eflags_in
&= ~(X86G_CC_MASK_C
| X86G_CC_MASK_O
);
2065 eflags_in
|= (cf
<< X86G_CC_SHIFT_C
) | (of
<< X86G_CC_SHIFT_O
);
2067 return (((ULong
)eflags_in
) << 32) | ((ULong
)arg
);
2071 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2072 /* Calculate both flags and value result for DAA/DAS/AAA/AAS.
2073 AX value in low half of arg, OSZACP in upper half.
2074 See guest-x86/toIR.c usage point for details.
2076 static UInt
calc_parity_8bit ( UInt w32
) {
2079 for (i
= 0; i
< 8; i
++)
2080 p
^= (1 & (w32
>> i
));
2083 UInt
x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX
, UInt opcode
)
2085 UInt r_AL
= (flags_and_AX
>> 0) & 0xFF;
2086 UInt r_AH
= (flags_and_AX
>> 8) & 0xFF;
2087 UInt r_O
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_O
)) & 1;
2088 UInt r_S
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_S
)) & 1;
2089 UInt r_Z
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_Z
)) & 1;
2090 UInt r_A
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_A
)) & 1;
2091 UInt r_C
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_C
)) & 1;
2092 UInt r_P
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_P
)) & 1;
2096 case 0x27: { /* DAA */
2100 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2103 if (r_AL
>= 0x100) r_C
= 1;
2108 if (old_AL
> 0x99 || old_C
== 1) {
2114 /* O is undefined. S Z and P are set according to the
2117 r_O
= 0; /* let's say */
2118 r_S
= (r_AL
& 0x80) ? 1 : 0;
2119 r_Z
= (r_AL
== 0) ? 1 : 0;
2120 r_P
= calc_parity_8bit( r_AL
);
2123 case 0x2F: { /* DAS */
2127 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2128 Bool borrow
= r_AL
< 6;
2131 if (borrow
) r_C
= 1;
2136 if (old_AL
> 0x99 || old_C
== 1) {
2140 /* Intel docs are wrong: r_C = 0; */
2142 /* O is undefined. S Z and P are set according to the
2145 r_O
= 0; /* let's say */
2146 r_S
= (r_AL
& 0x80) ? 1 : 0;
2147 r_Z
= (r_AL
== 0) ? 1 : 0;
2148 r_P
= calc_parity_8bit( r_AL
);
2151 case 0x37: { /* AAA */
2152 Bool nudge
= r_AL
> 0xF9;
2153 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2155 r_AH
= r_AH
+ 1 + (nudge
? 1 : 0);
2164 /* O S Z and P are undefined. */
2165 r_O
= r_S
= r_Z
= r_P
= 0; /* let's say */
2168 case 0x3F: { /* AAS */
2169 Bool nudge
= r_AL
< 0x06;
2170 if ((r_AL
& 0xF) > 9 || r_A
== 1) {
2172 r_AH
= r_AH
- 1 - (nudge
? 1 : 0);
2181 /* O S Z and P are undefined. */
2182 r_O
= r_S
= r_Z
= r_P
= 0; /* let's say */
2188 result
= ( (r_O
& 1) << (16 + X86G_CC_SHIFT_O
) )
2189 | ( (r_S
& 1) << (16 + X86G_CC_SHIFT_S
) )
2190 | ( (r_Z
& 1) << (16 + X86G_CC_SHIFT_Z
) )
2191 | ( (r_A
& 1) << (16 + X86G_CC_SHIFT_A
) )
2192 | ( (r_C
& 1) << (16 + X86G_CC_SHIFT_C
) )
2193 | ( (r_P
& 1) << (16 + X86G_CC_SHIFT_P
) )
2194 | ( (r_AH
& 0xFF) << 8 )
2195 | ( (r_AL
& 0xFF) << 0 );
2199 UInt
x86g_calculate_aad_aam ( UInt flags_and_AX
, UInt opcode
)
2201 UInt r_AL
= (flags_and_AX
>> 0) & 0xFF;
2202 UInt r_AH
= (flags_and_AX
>> 8) & 0xFF;
2203 UInt r_O
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_O
)) & 1;
2204 UInt r_S
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_S
)) & 1;
2205 UInt r_Z
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_Z
)) & 1;
2206 UInt r_A
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_A
)) & 1;
2207 UInt r_C
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_C
)) & 1;
2208 UInt r_P
= (flags_and_AX
>> (16 + X86G_CC_SHIFT_P
)) & 1;
2212 case 0xD4: { /* AAM */
2217 case 0xD5: { /* AAD */
2218 r_AL
= ((r_AH
* 10) + r_AL
) & 0xff;
2226 r_O
= 0; /* let's say (undefined) */
2227 r_C
= 0; /* let's say (undefined) */
2228 r_A
= 0; /* let's say (undefined) */
2229 r_S
= (r_AL
& 0x80) ? 1 : 0;
2230 r_Z
= (r_AL
== 0) ? 1 : 0;
2231 r_P
= calc_parity_8bit( r_AL
);
2233 result
= ( (r_O
& 1) << (16 + X86G_CC_SHIFT_O
) )
2234 | ( (r_S
& 1) << (16 + X86G_CC_SHIFT_S
) )
2235 | ( (r_Z
& 1) << (16 + X86G_CC_SHIFT_Z
) )
2236 | ( (r_A
& 1) << (16 + X86G_CC_SHIFT_A
) )
2237 | ( (r_C
& 1) << (16 + X86G_CC_SHIFT_C
) )
2238 | ( (r_P
& 1) << (16 + X86G_CC_SHIFT_P
) )
2239 | ( (r_AH
& 0xFF) << 8 )
2240 | ( (r_AL
& 0xFF) << 0 );
2245 /* CALLED FROM GENERATED CODE */
2246 /* DIRTY HELPER (non-referentially-transparent) */
2247 /* Horrible hack. On non-x86 platforms, return 1. */
2248 ULong
x86g_dirtyhelper_RDTSC ( void )
2250 # if defined(__i386__)
2252 __asm__
__volatile__("rdtsc" : "=A" (res
));
2260 /* CALLED FROM GENERATED CODE */
2261 /* DIRTY HELPER (modifies guest state) */
2262 /* Claim to be a P55C (Intel Pentium/MMX) */
2263 void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State
* st
)
2265 switch (st
->guest_EAX
) {
2267 st
->guest_EAX
= 0x1;
2268 st
->guest_EBX
= 0x756e6547;
2269 st
->guest_ECX
= 0x6c65746e;
2270 st
->guest_EDX
= 0x49656e69;
2273 st
->guest_EAX
= 0x543;
2274 st
->guest_EBX
= 0x0;
2275 st
->guest_ECX
= 0x0;
2276 st
->guest_EDX
= 0x8001bf;
2281 /* CALLED FROM GENERATED CODE */
2282 /* DIRTY HELPER (modifies guest state) */
2283 /* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
2284 /* But without 3DNow support (weird, but we really don't support it). */
2285 void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State
* st
)
2287 switch (st
->guest_EAX
) {
2290 st
->guest_EAX
= 0x1;
2291 st
->guest_EBX
= 0x68747541;
2292 st
->guest_ECX
= 0x444d4163;
2293 st
->guest_EDX
= 0x69746e65;
2297 st
->guest_EAX
= 0x621;
2298 st
->guest_EBX
= 0x0;
2299 st
->guest_ECX
= 0x0;
2300 st
->guest_EDX
= 0x183f9ff;
2302 /* Highest Extended Function Supported (0x80000004 brand string) */
2304 st
->guest_EAX
= 0x80000004;
2305 st
->guest_EBX
= 0x68747541;
2306 st
->guest_ECX
= 0x444d4163;
2307 st
->guest_EDX
= 0x69746e65;
2309 /* Extended Processor Info and Feature Bits */
2311 st
->guest_EAX
= 0x721;
2312 st
->guest_EBX
= 0x0;
2313 st
->guest_ECX
= 0x0;
2314 st
->guest_EDX
= 0x1c3f9ff; /* Note no 3DNow. */
2316 /* Processor Brand String "AMD Athlon(tm) Processor" */
2318 st
->guest_EAX
= 0x20444d41;
2319 st
->guest_EBX
= 0x6c687441;
2320 st
->guest_ECX
= 0x74286e6f;
2321 st
->guest_EDX
= 0x5020296d;
2324 st
->guest_EAX
= 0x65636f72;
2325 st
->guest_EBX
= 0x726f7373;
2326 st
->guest_ECX
= 0x0;
2327 st
->guest_EDX
= 0x0;
2330 st
->guest_EAX
= 0x0;
2331 st
->guest_EBX
= 0x0;
2332 st
->guest_ECX
= 0x0;
2333 st
->guest_EDX
= 0x0;
2338 /* CALLED FROM GENERATED CODE */
2339 /* DIRTY HELPER (modifies guest state) */
2340 /* Claim to be the following SSE1-capable CPU:
2341 vendor_id : GenuineIntel
2344 model name : Intel(R) Pentium(R) III CPU family 1133MHz
2349 void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State
* st
)
2351 switch (st
->guest_EAX
) {
2353 st
->guest_EAX
= 0x00000002;
2354 st
->guest_EBX
= 0x756e6547;
2355 st
->guest_ECX
= 0x6c65746e;
2356 st
->guest_EDX
= 0x49656e69;
2359 st
->guest_EAX
= 0x000006b1;
2360 st
->guest_EBX
= 0x00000004;
2361 st
->guest_ECX
= 0x00000000;
2362 st
->guest_EDX
= 0x0383fbff;
2365 st
->guest_EAX
= 0x03020101;
2366 st
->guest_EBX
= 0x00000000;
2367 st
->guest_ECX
= 0x00000000;
2368 st
->guest_EDX
= 0x0c040883;
2373 /* Claim to be the following SSE2-capable CPU:
2374 vendor_id : GenuineIntel
2377 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz
2382 flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
2383 pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
2386 cache_alignment : 128
2387 address sizes : 36 bits physical, 32 bits virtual
2389 void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State
* st
)
2391 switch (st
->guest_EAX
) {
2393 st
->guest_EAX
= 0x00000002;
2394 st
->guest_EBX
= 0x756e6547;
2395 st
->guest_ECX
= 0x6c65746e;
2396 st
->guest_EDX
= 0x49656e69;
2399 st
->guest_EAX
= 0x00000f29;
2400 st
->guest_EBX
= 0x01020809;
2401 st
->guest_ECX
= 0x00004400;
2402 st
->guest_EDX
= 0xbfebfbff;
2405 st
->guest_EAX
= 0x03020101;
2406 st
->guest_EBX
= 0x00000000;
2407 st
->guest_ECX
= 0x00000000;
2408 st
->guest_EDX
= 0x0c040883;
2413 /* Claim to be the following SSSE3-capable CPU (2 x ...):
2414 vendor_id : GenuineIntel
2417 model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
2420 cache size : 4096 KB
2429 flags : fpu vme de pse tsc msr pae mce cx8 apic sep
2430 mtrr pge mca cmov pat pse36 clflush dts acpi
2431 mmx fxsr sse sse2 ss ht tm syscall nx lm
2432 constant_tsc pni monitor ds_cpl vmx est tm2
2436 cache_alignment : 64
2437 address sizes : 36 bits physical, 48 bits virtual
2440 void x86g_dirtyhelper_CPUID_sse3 ( VexGuestX86State
* st
)
2442 # define SET_ABCD(_a,_b,_c,_d) \
2443 do { st->guest_EAX = (UInt)(_a); \
2444 st->guest_EBX = (UInt)(_b); \
2445 st->guest_ECX = (UInt)(_c); \
2446 st->guest_EDX = (UInt)(_d); \
2449 switch (st
->guest_EAX
) {
2451 SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
2454 SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
2457 SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
2460 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2463 switch (st
->guest_ECX
) {
2464 case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
2465 0x0000003f, 0x00000001); break;
2466 case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
2467 0x0000003f, 0x00000001); break;
2468 case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
2469 0x00000fff, 0x00000001); break;
2470 default: SET_ABCD(0x00000000, 0x00000000,
2471 0x00000000, 0x00000000); break;
2476 SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
2479 SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
2482 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2485 SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
2488 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2491 unhandled_eax_value
:
2492 SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
2495 SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
2498 SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
2501 SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
2504 SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
2507 SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
2510 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2513 SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
2516 SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
2519 SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
2522 goto unhandled_eax_value
;
2528 /* CALLED FROM GENERATED CODE */
2529 /* DIRTY HELPER (non-referentially-transparent) */
2530 /* Horrible hack. On non-x86 platforms, return 0. */
2531 UInt
x86g_dirtyhelper_IN ( UInt portno
, UInt sz
/*1,2 or 4*/ )
2533 # if defined(__i386__)
2538 __asm__
__volatile__("movl $0,%%eax; inl %w1,%0"
2539 : "=a" (r
) : "Nd" (portno
));
2542 __asm__
__volatile__("movl $0,%%eax; inw %w1,%w0"
2543 : "=a" (r
) : "Nd" (portno
));
2546 __asm__
__volatile__("movl $0,%%eax; inb %w1,%b0"
2547 : "=a" (r
) : "Nd" (portno
));
2559 /* CALLED FROM GENERATED CODE */
2560 /* DIRTY HELPER (non-referentially-transparent) */
2561 /* Horrible hack. On non-x86 platforms, do nothing. */
2562 void x86g_dirtyhelper_OUT ( UInt portno
, UInt data
, UInt sz
/*1,2 or 4*/ )
2564 # if defined(__i386__)
2568 __asm__
__volatile__("outl %0, %w1"
2569 : : "a" (data
), "Nd" (portno
));
2572 __asm__
__volatile__("outw %w0, %w1"
2573 : : "a" (data
), "Nd" (portno
));
2576 __asm__
__volatile__("outb %b0, %w1"
2577 : : "a" (data
), "Nd" (portno
));
2587 /* CALLED FROM GENERATED CODE */
2588 /* DIRTY HELPER (non-referentially-transparent) */
2589 /* Horrible hack. On non-x86 platforms, do nothing. */
2590 /* op = 0: call the native SGDT instruction.
2591 op = 1: call the native SIDT instruction.
2593 void x86g_dirtyhelper_SxDT ( void *address
, UInt op
) {
2594 # if defined(__i386__)
2597 __asm__
__volatile__("sgdt (%0)" : : "r" (address
) : "memory");
2600 __asm__
__volatile__("sidt (%0)" : : "r" (address
) : "memory");
2603 vpanic("x86g_dirtyhelper_SxDT");
2607 UChar
* p
= (UChar
*)address
;
2608 p
[0] = p
[1] = p
[2] = p
[3] = p
[4] = p
[5] = 0;
2612 /*---------------------------------------------------------------*/
2613 /*--- Helpers for MMX/SSE/SSE2. ---*/
2614 /*---------------------------------------------------------------*/
2616 static inline UChar
abdU8 ( UChar xx
, UChar yy
) {
2617 return toUChar(xx
>yy
? xx
-yy
: yy
-xx
);
2620 static inline ULong
mk32x2 ( UInt w1
, UInt w0
) {
2621 return (((ULong
)w1
) << 32) | ((ULong
)w0
);
2624 static inline UShort
sel16x4_3 ( ULong w64
) {
2625 UInt hi32
= toUInt(w64
>> 32);
2626 return toUShort(hi32
>> 16);
2628 static inline UShort
sel16x4_2 ( ULong w64
) {
2629 UInt hi32
= toUInt(w64
>> 32);
2630 return toUShort(hi32
);
2632 static inline UShort
sel16x4_1 ( ULong w64
) {
2633 UInt lo32
= toUInt(w64
);
2634 return toUShort(lo32
>> 16);
2636 static inline UShort
sel16x4_0 ( ULong w64
) {
2637 UInt lo32
= toUInt(w64
);
2638 return toUShort(lo32
);
2641 static inline UChar
sel8x8_7 ( ULong w64
) {
2642 UInt hi32
= toUInt(w64
>> 32);
2643 return toUChar(hi32
>> 24);
2645 static inline UChar
sel8x8_6 ( ULong w64
) {
2646 UInt hi32
= toUInt(w64
>> 32);
2647 return toUChar(hi32
>> 16);
2649 static inline UChar
sel8x8_5 ( ULong w64
) {
2650 UInt hi32
= toUInt(w64
>> 32);
2651 return toUChar(hi32
>> 8);
2653 static inline UChar
sel8x8_4 ( ULong w64
) {
2654 UInt hi32
= toUInt(w64
>> 32);
2655 return toUChar(hi32
>> 0);
2657 static inline UChar
sel8x8_3 ( ULong w64
) {
2658 UInt lo32
= toUInt(w64
);
2659 return toUChar(lo32
>> 24);
2661 static inline UChar
sel8x8_2 ( ULong w64
) {
2662 UInt lo32
= toUInt(w64
);
2663 return toUChar(lo32
>> 16);
2665 static inline UChar
sel8x8_1 ( ULong w64
) {
2666 UInt lo32
= toUInt(w64
);
2667 return toUChar(lo32
>> 8);
2669 static inline UChar
sel8x8_0 ( ULong w64
) {
2670 UInt lo32
= toUInt(w64
);
2671 return toUChar(lo32
>> 0);
2674 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2675 ULong
x86g_calculate_mmx_pmaddwd ( ULong xx
, ULong yy
)
2679 (((Int
)(Short
)sel16x4_3(xx
)) * ((Int
)(Short
)sel16x4_3(yy
)))
2680 + (((Int
)(Short
)sel16x4_2(xx
)) * ((Int
)(Short
)sel16x4_2(yy
))),
2681 (((Int
)(Short
)sel16x4_1(xx
)) * ((Int
)(Short
)sel16x4_1(yy
)))
2682 + (((Int
)(Short
)sel16x4_0(xx
)) * ((Int
)(Short
)sel16x4_0(yy
)))
2686 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2687 ULong
x86g_calculate_mmx_psadbw ( ULong xx
, ULong yy
)
2690 t
+= (UInt
)abdU8( sel8x8_7(xx
), sel8x8_7(yy
) );
2691 t
+= (UInt
)abdU8( sel8x8_6(xx
), sel8x8_6(yy
) );
2692 t
+= (UInt
)abdU8( sel8x8_5(xx
), sel8x8_5(yy
) );
2693 t
+= (UInt
)abdU8( sel8x8_4(xx
), sel8x8_4(yy
) );
2694 t
+= (UInt
)abdU8( sel8x8_3(xx
), sel8x8_3(yy
) );
2695 t
+= (UInt
)abdU8( sel8x8_2(xx
), sel8x8_2(yy
) );
2696 t
+= (UInt
)abdU8( sel8x8_1(xx
), sel8x8_1(yy
) );
2697 t
+= (UInt
)abdU8( sel8x8_0(xx
), sel8x8_0(yy
) );
2703 /*---------------------------------------------------------------*/
2704 /*--- Helpers for dealing with segment overrides. ---*/
2705 /*---------------------------------------------------------------*/
2708 UInt
get_segdescr_base ( VexGuestX86SegDescr
* ent
)
2710 UInt lo
= 0xFFFF & (UInt
)ent
->LdtEnt
.Bits
.BaseLow
;
2711 UInt mid
= 0xFF & (UInt
)ent
->LdtEnt
.Bits
.BaseMid
;
2712 UInt hi
= 0xFF & (UInt
)ent
->LdtEnt
.Bits
.BaseHi
;
2713 return (hi
<< 24) | (mid
<< 16) | lo
;
2717 UInt
get_segdescr_limit ( VexGuestX86SegDescr
* ent
)
2719 UInt lo
= 0xFFFF & (UInt
)ent
->LdtEnt
.Bits
.LimitLow
;
2720 UInt hi
= 0xF & (UInt
)ent
->LdtEnt
.Bits
.LimitHi
;
2721 UInt limit
= (hi
<< 16) | lo
;
2722 if (ent
->LdtEnt
.Bits
.Granularity
)
2723 limit
= (limit
<< 12) | 0xFFF;
2727 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
2728 ULong
x86g_use_seg_selector ( HWord ldt
, HWord gdt
,
2729 UInt seg_selector
, UInt virtual_addr
)
2731 UInt tiBit
, base
, limit
;
2732 VexGuestX86SegDescr
* the_descrs
;
2734 Bool verboze
= False
;
2736 /* If this isn't true, we're in Big Trouble. */
2737 vassert(8 == sizeof(VexGuestX86SegDescr
));
2740 vex_printf("x86h_use_seg_selector: "
2741 "seg_selector = 0x%x, vaddr = 0x%x\n",
2742 seg_selector
, virtual_addr
);
2744 /* Check for wildly invalid selector. */
2745 if (seg_selector
& ~0xFFFF)
2748 seg_selector
&= 0x0000FFFF;
2750 /* Sanity check the segment selector. Ensure that RPL=11b (least
2751 privilege). This forms the bottom 2 bits of the selector. */
2752 if ((seg_selector
& 3) != 3)
2755 /* Extract the TI bit (0 means GDT, 1 means LDT) */
2756 tiBit
= (seg_selector
>> 2) & 1;
2758 /* Convert the segment selector onto a table index */
2760 vassert(seg_selector
>= 0 && seg_selector
< 8192);
2765 /* Do we actually have a GDT to look at? */
2769 /* Check for access to non-existent entry. */
2770 if (seg_selector
>= VEX_GUEST_X86_GDT_NENT
)
2773 the_descrs
= (VexGuestX86SegDescr
*)gdt
;
2774 base
= get_segdescr_base (&the_descrs
[seg_selector
]);
2775 limit
= get_segdescr_limit(&the_descrs
[seg_selector
]);
2779 /* All the same stuff, except for the LDT. */
2783 if (seg_selector
>= VEX_GUEST_X86_LDT_NENT
)
2786 the_descrs
= (VexGuestX86SegDescr
*)ldt
;
2787 base
= get_segdescr_base (&the_descrs
[seg_selector
]);
2788 limit
= get_segdescr_limit(&the_descrs
[seg_selector
]);
2792 /* Do the limit check. Note, this check is just slightly too
2793 slack. Really it should be "if (virtual_addr + size - 1 >=
2794 limit)," but we don't have the size info to hand. Getting it
2795 could be significantly complex. */
2796 if (virtual_addr
>= limit
)
2800 vex_printf("x86h_use_seg_selector: "
2801 "base = 0x%x, addr = 0x%x\n",
2802 base
, base
+ virtual_addr
);
2804 /* High 32 bits are zero, indicating success. */
2805 return (ULong
)( ((UInt
)virtual_addr
) + base
);
2812 /*---------------------------------------------------------------*/
2813 /*--- Helpers for dealing with, and describing, ---*/
2814 /*--- guest state as a whole. ---*/
2815 /*---------------------------------------------------------------*/
2817 /* Initialise the entire x86 guest state. */
2818 /* VISIBLE TO LIBVEX CLIENT */
2819 void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State
* vex_state
)
2821 vex_state
->host_EvC_FAILADDR
= 0;
2822 vex_state
->host_EvC_COUNTER
= 0;
2824 vex_state
->guest_EAX
= 0;
2825 vex_state
->guest_ECX
= 0;
2826 vex_state
->guest_EDX
= 0;
2827 vex_state
->guest_EBX
= 0;
2828 vex_state
->guest_ESP
= 0;
2829 vex_state
->guest_EBP
= 0;
2830 vex_state
->guest_ESI
= 0;
2831 vex_state
->guest_EDI
= 0;
2833 vex_state
->guest_CC_OP
= X86G_CC_OP_COPY
;
2834 vex_state
->guest_CC_DEP1
= 0;
2835 vex_state
->guest_CC_DEP2
= 0;
2836 vex_state
->guest_CC_NDEP
= 0;
2837 vex_state
->guest_DFLAG
= 1; /* forwards */
2838 vex_state
->guest_IDFLAG
= 0;
2839 vex_state
->guest_ACFLAG
= 0;
2841 vex_state
->guest_EIP
= 0;
2843 /* Initialise the simulated FPU */
2844 x86g_dirtyhelper_FINIT( vex_state
);
2846 /* Initialse the SSE state. */
2847 # define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
2849 vex_state
->guest_SSEROUND
= (UInt
)Irrm_NEAREST
;
2850 SSEZERO(vex_state
->guest_XMM0
);
2851 SSEZERO(vex_state
->guest_XMM1
);
2852 SSEZERO(vex_state
->guest_XMM2
);
2853 SSEZERO(vex_state
->guest_XMM3
);
2854 SSEZERO(vex_state
->guest_XMM4
);
2855 SSEZERO(vex_state
->guest_XMM5
);
2856 SSEZERO(vex_state
->guest_XMM6
);
2857 SSEZERO(vex_state
->guest_XMM7
);
2861 vex_state
->guest_CS
= 0;
2862 vex_state
->guest_DS
= 0;
2863 vex_state
->guest_ES
= 0;
2864 vex_state
->guest_FS
= 0;
2865 vex_state
->guest_GS
= 0;
2866 vex_state
->guest_SS
= 0;
2867 vex_state
->guest_LDT
= 0;
2868 vex_state
->guest_GDT
= 0;
2870 vex_state
->guest_EMNOTE
= EmNote_NONE
;
2872 /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
2873 vex_state
->guest_CMSTART
= 0;
2874 vex_state
->guest_CMLEN
= 0;
2876 vex_state
->guest_NRADDR
= 0;
2877 vex_state
->guest_SC_CLASS
= 0;
2878 vex_state
->guest_IP_AT_SYSCALL
= 0;
2880 vex_state
->padding1
= 0;
2881 vex_state
->padding2
= 0;
2882 vex_state
->padding3
= 0;
2886 /* Figure out if any part of the guest state contained in minoff
2887 .. maxoff requires precise memory exceptions. If in doubt return
2888 True (but this generates significantly slower code).
2890 By default we enforce precise exns for guest %ESP, %EBP and %EIP
2891 only. These are the minimum needed to extract correct stack
2892 backtraces from x86 code.
2894 Only %ESP is needed in mode VexRegUpdSpAtMemAccess.
2896 Bool
guest_x86_state_requires_precise_mem_exns (
2897 Int minoff
, Int maxoff
, VexRegisterUpdates pxControl
2900 Int ebp_min
= offsetof(VexGuestX86State
, guest_EBP
);
2901 Int ebp_max
= ebp_min
+ 4 - 1;
2902 Int esp_min
= offsetof(VexGuestX86State
, guest_ESP
);
2903 Int esp_max
= esp_min
+ 4 - 1;
2904 Int eip_min
= offsetof(VexGuestX86State
, guest_EIP
);
2905 Int eip_max
= eip_min
+ 4 - 1;
2907 if (maxoff
< esp_min
|| minoff
> esp_max
) {
2908 /* no overlap with esp */
2909 if (pxControl
== VexRegUpdSpAtMemAccess
)
2910 return False
; // We only need to check stack pointer.
2915 if (maxoff
< ebp_min
|| minoff
> ebp_max
) {
2916 /* no overlap with ebp */
2921 if (maxoff
< eip_min
|| minoff
> eip_max
) {
2922 /* no overlap with eip */
2931 #define ALWAYSDEFD(field) \
2932 { offsetof(VexGuestX86State, field), \
2933 (sizeof ((VexGuestX86State*)0)->field) }
2938 /* Total size of the guest state, in bytes. */
2939 .total_sizeB
= sizeof(VexGuestX86State
),
2941 /* Describe the stack pointer. */
2942 .offset_SP
= offsetof(VexGuestX86State
,guest_ESP
),
2945 /* Describe the frame pointer. */
2946 .offset_FP
= offsetof(VexGuestX86State
,guest_EBP
),
2949 /* Describe the instruction pointer. */
2950 .offset_IP
= offsetof(VexGuestX86State
,guest_EIP
),
2953 /* Describe any sections to be regarded by Memcheck as
2954 'always-defined'. */
2957 /* flags thunk: OP and NDEP are always defd, whereas DEP1
2958 and DEP2 have to be tracked. See detailed comment in
2959 gdefs.h on meaning of thunk fields. */
2961 = { /* 0 */ ALWAYSDEFD(guest_CC_OP
),
2962 /* 1 */ ALWAYSDEFD(guest_CC_NDEP
),
2963 /* 2 */ ALWAYSDEFD(guest_DFLAG
),
2964 /* 3 */ ALWAYSDEFD(guest_IDFLAG
),
2965 /* 4 */ ALWAYSDEFD(guest_ACFLAG
),
2966 /* 5 */ ALWAYSDEFD(guest_EIP
),
2967 /* 6 */ ALWAYSDEFD(guest_FTOP
),
2968 /* 7 */ ALWAYSDEFD(guest_FPTAG
),
2969 /* 8 */ ALWAYSDEFD(guest_FPROUND
),
2970 /* 9 */ ALWAYSDEFD(guest_FC3210
),
2971 /* 10 */ ALWAYSDEFD(guest_CS
),
2972 /* 11 */ ALWAYSDEFD(guest_DS
),
2973 /* 12 */ ALWAYSDEFD(guest_ES
),
2974 /* 13 */ ALWAYSDEFD(guest_FS
),
2975 /* 14 */ ALWAYSDEFD(guest_GS
),
2976 /* 15 */ ALWAYSDEFD(guest_SS
),
2977 /* 16 */ ALWAYSDEFD(guest_LDT
),
2978 /* 17 */ ALWAYSDEFD(guest_GDT
),
2979 /* 18 */ ALWAYSDEFD(guest_EMNOTE
),
2980 /* 19 */ ALWAYSDEFD(guest_SSEROUND
),
2981 /* 20 */ ALWAYSDEFD(guest_CMSTART
),
2982 /* 21 */ ALWAYSDEFD(guest_CMLEN
),
2983 /* 22 */ ALWAYSDEFD(guest_SC_CLASS
),
2984 /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL
)
2989 /*---------------------------------------------------------------*/
2990 /*--- end guest_x86_helpers.c ---*/
2991 /*---------------------------------------------------------------*/