2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates x86 code to IR. */
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 32-bit value is being written.
41 FUCOMI(P): what happens to A and S flags? Currently are forced
46 * all arithmetic done at 64 bits
48 * no FP exceptions, except for handling stack over/underflow
50 * FP rounding mode observed only for float->int conversions
51 and int->float conversions which could lose accuracy, and
52 for float-to-float rounding. For all other operations,
53 round-to-nearest is used, regardless.
55 * some of the FCOM cases could do with testing -- not convinced
56 that the args are the right way round.
58 * FSAVE does not re-initialise the FPU; it should do
60 * FINIT not only initialises the FPU environment, it also
61 zeroes all the FP registers. It should leave the registers
64 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
65 per Intel docs this bit has no meaning anyway. Since PUSHF is the
66 only way to observe eflags[1], a proper fix would be to make that
69 The state of %eflags.AC (alignment check, bit 18) is recorded by
70 the simulation (viz, if you set it with popf then a pushf produces
71 the value you set it to), but it is otherwise ignored. In
72 particular, setting it to 1 does NOT cause alignment checking to
73 happen. Programs that set it to 1 and then rely on the resulting
74 SIGBUSs to inform them of misaligned accesses will not work.
76 Implementation of sysenter is necessarily partial. sysenter is a
77 kind of system call entry. When doing a sysenter, the return
78 address is not known -- that is something that is beyond Vex's
79 knowledge. So the generated IR forces a return to the scheduler,
80 which can do what it likes to simulate the systenter, but it MUST
81 set this thread's guest_EIP field with the continuation address
82 before resuming execution. If that doesn't happen, the thread will
83 jump to address zero, which is probably fatal.
85 This module uses global variables and so is not MT-safe (if that
86 should ever become relevant).
88 The delta values are 32-bit ints, not 64-bit ints. That means
89 this module may not work right if run on a 64-bit host. That should
90 be fixed properly, really -- if anyone ever wants to use Vex to
91 translate x86 code for execution on a 64-bit host.
93 casLE (implementation of lock-prefixed insns) and rep-prefixed
94 insns: the side-exit back to the start of the insn is done with
95 Ijk_Boring. This is quite wrong, it should be done with
96 Ijk_NoRedir, since otherwise the side exit, which is intended to
97 restart the instruction for whatever reason, could go somewhere
98 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
99 no-redir jumps performance critical, at least for rep-prefixed
100 instructions, since all iterations thereof would involve such a
101 jump. It's not such a big deal with casLE since the side exit is
102 only taken if the CAS fails, that is, the location is contended,
103 which is relatively unlikely.
105 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
108 Note also, the test for CAS success vs failure is done using
109 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
110 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
111 shouldn't definedness-check these comparisons. See
112 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
113 background/rationale.
116 /* Performance holes:
118 - fcom ; fstsw %ax ; sahf
119 sahf does not update the O flag (sigh) and so O needs to
120 be computed. This is done expensively; it would be better
121 to have a calculate_eflags_o helper.
123 - emwarns; some FP codes can generate huge numbers of these
124 if the fpucw is changed in an inner loop. It would be
125 better for the guest state to have an emwarn-enable reg
126 which can be set zero or nonzero. If it is zero, emwarns
127 are not flagged, and instead control just flows all the
128 way through bbs as usual.
131 /* "Special" instructions.
133 This instruction decoder can decode three special instructions
134 which mean nothing natively (are no-ops as far as regs/mem are
135 concerned) but have meaning for supporting Valgrind. A special
136 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
137 C1C713 (in the standard interpretation, that means: roll $3, %edi;
138 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
139 one of the following 3 are allowed (standard interpretation in
142 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
143 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
144 87D2 (xchgl %edx,%edx) call-noredir *%EAX
145 87FF (xchgl %edi,%edi) IR injection
147 Any other bytes following the 12-byte preamble are illegal and
148 constitute a failure in instruction decoding. This all assumes
149 that the preamble will never occur except in specific code
150 fragments designed for Valgrind to catch.
152 No prefixes may precede a "Special" instruction.
155 /* LOCK prefixed instructions. These are translated using IR-level
156 CAS statements (IRCAS) and are believed to preserve atomicity, even
157 from the point of view of some other process racing against a
158 simulated one (presumably they communicate via a shared memory
161 Handlers which are aware of LOCK prefixes are:
162 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
163 dis_cmpxchg_G_E (cmpxchg)
164 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
168 dis_Grp8_Imm (bts, btc, btr)
169 dis_bt_G_E (bts, btc, btr)
174 #include "libvex_basictypes.h"
175 #include "libvex_ir.h"
177 #include "libvex_guest_x86.h"
179 #include "main_util.h"
180 #include "main_globals.h"
181 #include "guest_generic_bb_to_IR.h"
182 #include "guest_generic_x87.h"
183 #include "guest_x86_defs.h"
186 /*------------------------------------------------------------*/
188 /*------------------------------------------------------------*/
190 /* These are set at the start of the translation of an insn, right
191 down in disInstr_X86, so that we don't have to pass them around
192 endlessly. They are all constant during the translation of any
195 /* We need to know this to do sub-register accesses correctly. */
196 static VexEndness host_endness
;
198 /* Pointer to the guest code area (points to start of BB, not to the
199 insn being processed). */
200 static const UChar
* guest_code
;
202 /* The guest address corresponding to guest_code[0]. */
203 static Addr32 guest_EIP_bbstart
;
205 /* The guest address for the instruction currently being
207 static Addr32 guest_EIP_curr_instr
;
209 /* The IRSB* into which we're generating code. */
213 /*------------------------------------------------------------*/
214 /*--- Debugging output ---*/
215 /*------------------------------------------------------------*/
217 #define DIP(format, args...) \
218 if (vex_traceflags & VEX_TRACE_FE) \
219 vex_printf(format, ## args)
221 #define DIS(buf, format, args...) \
222 if (vex_traceflags & VEX_TRACE_FE) \
223 vex_sprintf(buf, format, ## args)
226 /*------------------------------------------------------------*/
227 /*--- Offsets of various parts of the x86 guest state. ---*/
228 /*------------------------------------------------------------*/
230 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
231 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
232 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
233 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
234 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
235 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
236 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
237 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
239 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
241 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
242 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
243 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
244 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
246 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
247 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
248 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
249 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
250 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
251 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
252 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
253 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
255 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
256 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
257 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
258 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
259 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
260 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
261 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
262 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
264 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
265 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
266 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
267 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
268 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
269 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
270 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
271 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
272 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
274 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
276 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
277 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
278 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
280 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
283 /*------------------------------------------------------------*/
284 /*--- Helper bits and pieces for deconstructing the ---*/
285 /*--- x86 insn stream. ---*/
286 /*------------------------------------------------------------*/
288 /* This is the Intel register encoding -- integer regs. */
298 #define R_AL (0+R_EAX)
299 #define R_AH (4+R_EAX)
301 /* This is the Intel register encoding -- segment regs. */
310 /* Add a statement to the list held by "irbb". */
311 static void stmt ( IRStmt
* st
)
313 addStmtToIRSB( irsb
, st
);
316 /* Generate a new temporary of the given type. */
317 static IRTemp
newTemp ( IRType ty
)
319 vassert(isPlausibleIRType(ty
));
320 return newIRTemp( irsb
->tyenv
, ty
);
323 /* Various simple conversions */
325 static UInt
extend_s_8to32( UInt x
)
327 return (UInt
)((Int
)(x
<< 24) >> 24);
330 static UInt
extend_s_16to32 ( UInt x
)
332 return (UInt
)((Int
)(x
<< 16) >> 16);
335 /* Fetch a byte from the guest insn stream. */
336 static UChar
getIByte ( Int delta
)
338 return guest_code
[delta
];
341 /* Extract the reg field from a modRM byte. */
342 static Int
gregOfRM ( UChar mod_reg_rm
)
344 return (Int
)( (mod_reg_rm
>> 3) & 7 );
347 /* Figure out whether the mod and rm parts of a modRM byte refer to a
348 register or memory. If so, the byte will have the form 11XXXYYY,
349 where YYY is the register number. */
350 static Bool
epartIsReg ( UChar mod_reg_rm
)
352 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
355 /* ... and extract the register number ... */
356 static Int
eregOfRM ( UChar mod_reg_rm
)
358 return (Int
)(mod_reg_rm
& 0x7);
361 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
363 static UChar
getUChar ( Int delta
)
365 UChar v
= guest_code
[delta
+0];
369 static UInt
getUDisp16 ( Int delta
)
371 UInt v
= guest_code
[delta
+1]; v
<<= 8;
372 v
|= guest_code
[delta
+0];
376 static UInt
getUDisp32 ( Int delta
)
378 UInt v
= guest_code
[delta
+3]; v
<<= 8;
379 v
|= guest_code
[delta
+2]; v
<<= 8;
380 v
|= guest_code
[delta
+1]; v
<<= 8;
381 v
|= guest_code
[delta
+0];
385 static UInt
getUDisp ( Int size
, Int delta
)
388 case 4: return getUDisp32(delta
);
389 case 2: return getUDisp16(delta
);
390 case 1: return (UInt
)getUChar(delta
);
391 default: vpanic("getUDisp(x86)");
393 return 0; /*notreached*/
397 /* Get a byte value out of the insn stream and sign-extend to 32
399 static UInt
getSDisp8 ( Int delta
)
401 return extend_s_8to32( (UInt
) (guest_code
[delta
]) );
404 static UInt
getSDisp16 ( Int delta0
)
406 const UChar
* eip
= &guest_code
[delta0
];
408 d
|= ((*eip
++) << 8);
409 return extend_s_16to32(d
);
412 static UInt
getSDisp ( Int size
, Int delta
)
415 case 4: return getUDisp32(delta
);
416 case 2: return getSDisp16(delta
);
417 case 1: return getSDisp8(delta
);
418 default: vpanic("getSDisp(x86)");
420 return 0; /*notreached*/
424 /*------------------------------------------------------------*/
425 /*--- Helpers for constructing IR. ---*/
426 /*------------------------------------------------------------*/
428 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
429 register references, we need to take the host endianness into
430 account. Supplied value is 0 .. 7 and in the Intel instruction
433 static IRType
szToITy ( Int n
)
436 case 1: return Ity_I8
;
437 case 2: return Ity_I16
;
438 case 4: return Ity_I32
;
439 default: vpanic("szToITy(x86)");
443 /* On a little-endian host, less significant bits of the guest
444 registers are at lower addresses. Therefore, if a reference to a
445 register low half has the safe guest state offset as a reference to
448 static Int
integerGuestRegOffset ( Int sz
, UInt archreg
)
450 vassert(archreg
< 8);
452 /* Correct for little-endian host only. */
453 vassert(host_endness
== VexEndnessLE
);
455 if (sz
== 4 || sz
== 2 || (sz
== 1 && archreg
< 4)) {
457 case R_EAX
: return OFFB_EAX
;
458 case R_EBX
: return OFFB_EBX
;
459 case R_ECX
: return OFFB_ECX
;
460 case R_EDX
: return OFFB_EDX
;
461 case R_ESI
: return OFFB_ESI
;
462 case R_EDI
: return OFFB_EDI
;
463 case R_ESP
: return OFFB_ESP
;
464 case R_EBP
: return OFFB_EBP
;
465 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
469 vassert(archreg
>= 4 && archreg
< 8 && sz
== 1);
471 case R_EAX
: return 1+ OFFB_EAX
;
472 case R_EBX
: return 1+ OFFB_EBX
;
473 case R_ECX
: return 1+ OFFB_ECX
;
474 case R_EDX
: return 1+ OFFB_EDX
;
475 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
479 vpanic("integerGuestRegOffset(x86,le)");
482 static Int
segmentGuestRegOffset ( UInt sreg
)
485 case R_ES
: return OFFB_ES
;
486 case R_CS
: return OFFB_CS
;
487 case R_SS
: return OFFB_SS
;
488 case R_DS
: return OFFB_DS
;
489 case R_FS
: return OFFB_FS
;
490 case R_GS
: return OFFB_GS
;
491 default: vpanic("segmentGuestRegOffset(x86)");
495 static Int
xmmGuestRegOffset ( UInt xmmreg
)
498 case 0: return OFFB_XMM0
;
499 case 1: return OFFB_XMM1
;
500 case 2: return OFFB_XMM2
;
501 case 3: return OFFB_XMM3
;
502 case 4: return OFFB_XMM4
;
503 case 5: return OFFB_XMM5
;
504 case 6: return OFFB_XMM6
;
505 case 7: return OFFB_XMM7
;
506 default: vpanic("xmmGuestRegOffset");
510 /* Lanes of vector registers are always numbered from zero being the
511 least significant lane (rightmost in the register). */
513 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
515 /* Correct for little-endian host only. */
516 vassert(host_endness
== VexEndnessLE
);
517 vassert(laneno
>= 0 && laneno
< 8);
518 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
521 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
523 /* Correct for little-endian host only. */
524 vassert(host_endness
== VexEndnessLE
);
525 vassert(laneno
>= 0 && laneno
< 4);
526 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
529 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
531 /* Correct for little-endian host only. */
532 vassert(host_endness
== VexEndnessLE
);
533 vassert(laneno
>= 0 && laneno
< 2);
534 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
537 static IRExpr
* getIReg ( Int sz
, UInt archreg
)
539 vassert(sz
== 1 || sz
== 2 || sz
== 4);
540 vassert(archreg
< 8);
541 return IRExpr_Get( integerGuestRegOffset(sz
,archreg
),
545 /* Ditto, but write to a reg instead. */
546 static void putIReg ( Int sz
, UInt archreg
, IRExpr
* e
)
548 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
550 case 1: vassert(ty
== Ity_I8
); break;
551 case 2: vassert(ty
== Ity_I16
); break;
552 case 4: vassert(ty
== Ity_I32
); break;
553 default: vpanic("putIReg(x86)");
555 vassert(archreg
< 8);
556 stmt( IRStmt_Put(integerGuestRegOffset(sz
,archreg
), e
) );
559 static IRExpr
* getSReg ( UInt sreg
)
561 return IRExpr_Get( segmentGuestRegOffset(sreg
), Ity_I16
);
564 static void putSReg ( UInt sreg
, IRExpr
* e
)
566 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
567 stmt( IRStmt_Put( segmentGuestRegOffset(sreg
), e
) );
570 static IRExpr
* getXMMReg ( UInt xmmreg
)
572 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
575 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
577 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
580 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
585 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
587 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
590 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
595 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
597 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
598 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
601 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
603 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
604 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
607 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
609 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
610 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
613 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
615 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
616 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
619 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
621 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
622 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
625 static void putXMMRegLane16 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
627 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
628 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg
,laneno
), e
) );
631 static void assign ( IRTemp dst
, IRExpr
* e
)
633 stmt( IRStmt_WrTmp(dst
, e
) );
636 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
638 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
641 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
643 return IRExpr_Unop(op
, a
);
646 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
648 return IRExpr_Binop(op
, a1
, a2
);
651 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
653 return IRExpr_Triop(op
, a1
, a2
, a3
);
656 static IRExpr
* mkexpr ( IRTemp tmp
)
658 return IRExpr_RdTmp(tmp
);
661 static IRExpr
* mkU8 ( UInt i
)
664 return IRExpr_Const(IRConst_U8( (UChar
)i
));
667 static IRExpr
* mkU16 ( UInt i
)
670 return IRExpr_Const(IRConst_U16( (UShort
)i
));
673 static IRExpr
* mkU32 ( UInt i
)
675 return IRExpr_Const(IRConst_U32(i
));
678 static IRExpr
* mkU64 ( ULong i
)
680 return IRExpr_Const(IRConst_U64(i
));
683 static IRExpr
* mkU ( IRType ty
, UInt i
)
685 if (ty
== Ity_I8
) return mkU8(i
);
686 if (ty
== Ity_I16
) return mkU16(i
);
687 if (ty
== Ity_I32
) return mkU32(i
);
688 /* If this panics, it usually means you passed a size (1,2,4)
689 value as the IRType, rather than a real IRType. */
693 static IRExpr
* mkV128 ( UShort mask
)
695 return IRExpr_Const(IRConst_V128(mask
));
698 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
700 return IRExpr_Load(Iend_LE
, ty
, addr
);
703 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
706 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
707 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
709 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
710 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
711 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
712 || op8
== Iop_CasCmpNE8
713 || op8
== Iop_ExpCmpNE8
715 adj
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
719 static IROp
mkWidenOp ( Int szSmall
, Int szBig
, Bool signd
)
721 if (szSmall
== 1 && szBig
== 4) {
722 return signd
? Iop_8Sto32
: Iop_8Uto32
;
724 if (szSmall
== 1 && szBig
== 2) {
725 return signd
? Iop_8Sto16
: Iop_8Uto16
;
727 if (szSmall
== 2 && szBig
== 4) {
728 return signd
? Iop_16Sto32
: Iop_16Uto32
;
730 vpanic("mkWidenOp(x86,guest)");
733 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
735 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
736 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
737 return unop(Iop_32to1
,
740 unop(Iop_1Uto32
,y
)));
743 /* Generate a compare-and-swap operation, operating on memory at
744 'addr'. The expected value is 'expVal' and the new value is
745 'newVal'. If the operation fails, then transfer control (with a
746 no-redir jump (XXX no -- see comment at top of this file)) to
747 'restart_point', which is presumably the address of the guest
748 instruction again -- retrying, essentially. */
749 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
750 Addr32 restart_point
)
753 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
754 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
755 IRTemp oldTmp
= newTemp(tyE
);
756 IRTemp expTmp
= newTemp(tyE
);
758 vassert(tyE
== Ity_I32
|| tyE
== Ity_I16
|| tyE
== Ity_I8
);
759 assign(expTmp
, expVal
);
760 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
761 NULL
, mkexpr(expTmp
), NULL
, newVal
);
762 stmt( IRStmt_CAS(cas
) );
764 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
765 mkexpr(oldTmp
), mkexpr(expTmp
) ),
766 Ijk_Boring
, /*Ijk_NoRedir*/
767 IRConst_U32( restart_point
),
773 /*------------------------------------------------------------*/
774 /*--- Helpers for %eflags. ---*/
775 /*------------------------------------------------------------*/
777 /* -------------- Evaluating the flags-thunk. -------------- */
779 /* Build IR to calculate all the eflags from stored
780 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
782 static IRExpr
* mk_x86g_calculate_eflags_all ( void )
785 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
786 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
787 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
788 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
793 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all
,
796 /* Exclude OP and NDEP from definedness checking. We're only
797 interested in DEP1 and DEP2. */
798 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
802 /* Build IR to calculate some particular condition from stored
803 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
805 static IRExpr
* mk_x86g_calculate_condition ( X86Condcode cond
)
808 = mkIRExprVec_5( mkU32(cond
),
809 IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
810 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
811 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
812 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
817 "x86g_calculate_condition", &x86g_calculate_condition
,
820 /* Exclude the requested condition, OP and NDEP from definedness
821 checking. We're only interested in DEP1 and DEP2. */
822 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
823 return unop(Iop_32to1
, call
);
826 /* Build IR to calculate just the carry flag from stored
827 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
828 static IRExpr
* mk_x86g_calculate_eflags_c ( void )
831 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
832 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
833 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
834 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
839 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c
,
842 /* Exclude OP and NDEP from definedness checking. We're only
843 interested in DEP1 and DEP2. */
844 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
849 /* -------------- Building the flags-thunk. -------------- */
851 /* The machinery in this section builds the flag-thunk following a
852 flag-setting operation. Hence the various setFlags_* functions.
855 static Bool
isAddSub ( IROp op8
)
857 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
860 static Bool
isLogic ( IROp op8
)
862 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
865 /* U-widen 8/16/32 bit int expr to 32. */
866 static IRExpr
* widenUto32 ( IRExpr
* e
)
868 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
869 case Ity_I32
: return e
;
870 case Ity_I16
: return unop(Iop_16Uto32
,e
);
871 case Ity_I8
: return unop(Iop_8Uto32
,e
);
872 default: vpanic("widenUto32");
876 /* S-widen 8/16/32 bit int expr to 32. */
877 static IRExpr
* widenSto32 ( IRExpr
* e
)
879 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
880 case Ity_I32
: return e
;
881 case Ity_I16
: return unop(Iop_16Sto32
,e
);
882 case Ity_I8
: return unop(Iop_8Sto32
,e
);
883 default: vpanic("widenSto32");
887 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
888 of these combinations make sense. */
889 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
891 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
892 if (src_ty
== dst_ty
)
894 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
895 return unop(Iop_32to16
, e
);
896 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
897 return unop(Iop_32to8
, e
);
899 vex_printf("\nsrc, dst tys are: ");
904 vpanic("narrowTo(x86)");
908 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
909 auto-sized up to the real op. */
912 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
914 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
916 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
919 case Iop_Add8
: ccOp
+= X86G_CC_OP_ADDB
; break;
920 case Iop_Sub8
: ccOp
+= X86G_CC_OP_SUBB
; break;
921 default: ppIROp(op8
);
922 vpanic("setFlags_DEP1_DEP2(x86)");
924 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
925 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(dep1
))) );
926 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(mkexpr(dep2
))) );
927 /* Set NDEP even though it isn't used. This makes redundant-PUT
928 elimination of previous stores to this field work better. */
929 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
933 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
936 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
938 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
940 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
945 case Iop_Xor8
: ccOp
+= X86G_CC_OP_LOGICB
; break;
946 default: ppIROp(op8
);
947 vpanic("setFlags_DEP1(x86)");
949 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
950 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(dep1
))) );
951 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0)) );
952 /* Set NDEP even though it isn't used. This makes redundant-PUT
953 elimination of previous stores to this field work better. */
954 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
958 /* For shift operations, we put in the result and the undershifted
959 result. Except if the shift amount is zero, the thunk is left
962 static void setFlags_DEP1_DEP2_shift ( IROp op32
,
968 Int ccOp
= ty
==Ity_I8
? 2 : (ty
==Ity_I16
? 1 : 0);
970 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
973 /* Both kinds of right shifts are handled by the same thunk
977 case Iop_Sar32
: ccOp
= X86G_CC_OP_SHRL
- ccOp
; break;
978 case Iop_Shl32
: ccOp
= X86G_CC_OP_SHLL
- ccOp
; break;
979 default: ppIROp(op32
);
980 vpanic("setFlags_DEP1_DEP2_shift(x86)");
983 /* guard :: Ity_I8. We need to convert it to I1. */
984 IRTemp guardB
= newTemp(Ity_I1
);
985 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
987 /* DEP1 contains the result, DEP2 contains the undershifted value. */
988 stmt( IRStmt_Put( OFFB_CC_OP
,
989 IRExpr_ITE( mkexpr(guardB
),
991 IRExpr_Get(OFFB_CC_OP
,Ity_I32
) ) ));
992 stmt( IRStmt_Put( OFFB_CC_DEP1
,
993 IRExpr_ITE( mkexpr(guardB
),
994 widenUto32(mkexpr(res
)),
995 IRExpr_Get(OFFB_CC_DEP1
,Ity_I32
) ) ));
996 stmt( IRStmt_Put( OFFB_CC_DEP2
,
997 IRExpr_ITE( mkexpr(guardB
),
998 widenUto32(mkexpr(resUS
)),
999 IRExpr_Get(OFFB_CC_DEP2
,Ity_I32
) ) ));
1000 /* Set NDEP even though it isn't used. This makes redundant-PUT
1001 elimination of previous stores to this field work better. */
1002 stmt( IRStmt_Put( OFFB_CC_NDEP
,
1003 IRExpr_ITE( mkexpr(guardB
),
1005 IRExpr_Get(OFFB_CC_NDEP
,Ity_I32
) ) ));
1009 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1010 the former value of the carry flag, which unfortunately we have to
1013 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1015 Int ccOp
= inc
? X86G_CC_OP_INCB
: X86G_CC_OP_DECB
;
1017 ccOp
+= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
1018 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
1020 /* This has to come first, because calculating the C flag
1021 may require reading all four thunk fields. */
1022 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_x86g_calculate_eflags_c()) );
1023 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
1024 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(res
))) );
1025 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0)) );
1029 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1033 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, UInt base_op
)
1037 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+0) ) );
1040 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+1) ) );
1043 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+2) ) );
1046 vpanic("setFlags_MUL(x86)");
1048 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(arg1
)) ));
1049 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(mkexpr(arg2
)) ));
1050 /* Set NDEP even though it isn't used. This makes redundant-PUT
1051 elimination of previous stores to this field work better. */
1052 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
1056 /* -------------- Condition codes. -------------- */
1058 /* Condition codes, using the Intel encoding. */
1060 static const HChar
* name_X86Condcode ( X86Condcode cond
)
1063 case X86CondO
: return "o";
1064 case X86CondNO
: return "no";
1065 case X86CondB
: return "b";
1066 case X86CondNB
: return "nb";
1067 case X86CondZ
: return "z";
1068 case X86CondNZ
: return "nz";
1069 case X86CondBE
: return "be";
1070 case X86CondNBE
: return "nbe";
1071 case X86CondS
: return "s";
1072 case X86CondNS
: return "ns";
1073 case X86CondP
: return "p";
1074 case X86CondNP
: return "np";
1075 case X86CondL
: return "l";
1076 case X86CondNL
: return "nl";
1077 case X86CondLE
: return "le";
1078 case X86CondNLE
: return "nle";
1079 case X86CondAlways
: return "ALWAYS";
1080 default: vpanic("name_X86Condcode");
1085 X86Condcode
positiveIse_X86Condcode ( X86Condcode cond
,
1088 vassert(cond
>= X86CondO
&& cond
<= X86CondNLE
);
1093 *needInvert
= False
;
1099 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1101 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1104 Optionally, generate a store for the 'tres' value. This can either
1105 be a normal store, or it can be a cas-with-possible-failure style
1108 if taddr is IRTemp_INVALID, then no store is generated.
1110 if taddr is not IRTemp_INVALID, then a store (using taddr as
1111 the address) is generated:
1113 if texpVal is IRTemp_INVALID then a normal store is
1114 generated, and restart_point must be zero (it is irrelevant).
1116 if texpVal is not IRTemp_INVALID then a cas-style store is
1117 generated. texpVal is the expected value, restart_point
1118 is the restart point if the store fails, and texpVal must
1119 have the same type as tres.
1121 static void helper_ADC ( Int sz
,
1122 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
1123 /* info about optional store: */
1124 IRTemp taddr
, IRTemp texpVal
, Addr32 restart_point
)
1127 IRType ty
= szToITy(sz
);
1128 IRTemp oldc
= newTemp(Ity_I32
);
1129 IRTemp oldcn
= newTemp(ty
);
1130 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
1131 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
1133 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
1134 vassert(sz
== 1 || sz
== 2 || sz
== 4);
1135 thunkOp
= sz
==4 ? X86G_CC_OP_ADCL
1136 : (sz
==2 ? X86G_CC_OP_ADCW
: X86G_CC_OP_ADCB
);
1138 /* oldc = old carry flag, 0 or 1 */
1139 assign( oldc
, binop(Iop_And32
,
1140 mk_x86g_calculate_eflags_c(),
1143 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
1145 assign( tres
, binop(plus
,
1146 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
1149 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1150 start of this function. */
1151 if (taddr
!= IRTemp_INVALID
) {
1152 if (texpVal
== IRTemp_INVALID
) {
1153 vassert(restart_point
== 0);
1154 storeLE( mkexpr(taddr
), mkexpr(tres
) );
1156 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
1157 /* .. and hence 'texpVal' has the same type as 'tres'. */
1158 casLE( mkexpr(taddr
),
1159 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
1163 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(thunkOp
) ) );
1164 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(ta1
)) ));
1165 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(binop(xor, mkexpr(ta2
),
1166 mkexpr(oldcn
)) )) );
1167 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
1171 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1172 appropriately. As with helper_ADC, possibly generate a store of
1173 the result -- see comments on helper_ADC for details.
1175 static void helper_SBB ( Int sz
,
1176 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
1177 /* info about optional store: */
1178 IRTemp taddr
, IRTemp texpVal
, Addr32 restart_point
)
1181 IRType ty
= szToITy(sz
);
1182 IRTemp oldc
= newTemp(Ity_I32
);
1183 IRTemp oldcn
= newTemp(ty
);
1184 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
1185 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
1187 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
1188 vassert(sz
== 1 || sz
== 2 || sz
== 4);
1189 thunkOp
= sz
==4 ? X86G_CC_OP_SBBL
1190 : (sz
==2 ? X86G_CC_OP_SBBW
: X86G_CC_OP_SBBB
);
1192 /* oldc = old carry flag, 0 or 1 */
1193 assign( oldc
, binop(Iop_And32
,
1194 mk_x86g_calculate_eflags_c(),
1197 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
1199 assign( tres
, binop(minus
,
1200 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
1203 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1204 start of this function. */
1205 if (taddr
!= IRTemp_INVALID
) {
1206 if (texpVal
== IRTemp_INVALID
) {
1207 vassert(restart_point
== 0);
1208 storeLE( mkexpr(taddr
), mkexpr(tres
) );
1210 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
1211 /* .. and hence 'texpVal' has the same type as 'tres'. */
1212 casLE( mkexpr(taddr
),
1213 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
1217 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(thunkOp
) ) );
1218 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(ta1
) )) );
1219 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(binop(xor, mkexpr(ta2
),
1220 mkexpr(oldcn
)) )) );
1221 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
1225 /* -------------- Helpers for disassembly printing. -------------- */
1227 static const HChar
* nameGrp1 ( Int opc_aux
)
1229 static const HChar
* grp1_names
[8]
1230 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1231 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(x86)");
1232 return grp1_names
[opc_aux
];
1235 static const HChar
* nameGrp2 ( Int opc_aux
)
1237 static const HChar
* grp2_names
[8]
1238 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1239 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(x86)");
1240 return grp2_names
[opc_aux
];
1243 static const HChar
* nameGrp4 ( Int opc_aux
)
1245 static const HChar
* grp4_names
[8]
1246 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1247 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(x86)");
1248 return grp4_names
[opc_aux
];
1251 static const HChar
* nameGrp5 ( Int opc_aux
)
1253 static const HChar
* grp5_names
[8]
1254 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1255 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(x86)");
1256 return grp5_names
[opc_aux
];
1259 static const HChar
* nameGrp8 ( Int opc_aux
)
1261 static const HChar
* grp8_names
[8]
1262 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1263 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(x86)");
1264 return grp8_names
[opc_aux
];
1267 static const HChar
* nameIReg ( Int size
, Int reg
)
1269 static const HChar
* ireg32_names
[8]
1270 = { "%eax", "%ecx", "%edx", "%ebx",
1271 "%esp", "%ebp", "%esi", "%edi" };
1272 static const HChar
* ireg16_names
[8]
1273 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1274 static const HChar
* ireg8_names
[8]
1275 = { "%al", "%cl", "%dl", "%bl",
1276 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1277 if (reg
< 0 || reg
> 7) goto bad
;
1279 case 4: return ireg32_names
[reg
];
1280 case 2: return ireg16_names
[reg
];
1281 case 1: return ireg8_names
[reg
];
1284 vpanic("nameIReg(X86)");
1285 return NULL
; /*notreached*/
1288 static const HChar
* nameSReg ( UInt sreg
)
1291 case R_ES
: return "%es";
1292 case R_CS
: return "%cs";
1293 case R_SS
: return "%ss";
1294 case R_DS
: return "%ds";
1295 case R_FS
: return "%fs";
1296 case R_GS
: return "%gs";
1297 default: vpanic("nameSReg(x86)");
1301 static const HChar
* nameMMXReg ( Int mmxreg
)
1303 static const HChar
* mmx_names
[8]
1304 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1305 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(x86,guest)");
1306 return mmx_names
[mmxreg
];
1309 static const HChar
* nameXMMReg ( Int xmmreg
)
1311 static const HChar
* xmm_names
[8]
1312 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1313 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1314 if (xmmreg
< 0 || xmmreg
> 7) vpanic("name_of_xmm_reg");
1315 return xmm_names
[xmmreg
];
1318 static const HChar
* nameMMXGran ( Int gran
)
1325 default: vpanic("nameMMXGran(x86,guest)");
1329 static HChar
nameISize ( Int size
)
1335 default: vpanic("nameISize(x86)");
1340 /*------------------------------------------------------------*/
1341 /*--- JMP helpers ---*/
1342 /*------------------------------------------------------------*/
1344 static void jmp_lit( /*MOD*/DisResult
* dres
,
1345 IRJumpKind kind
, Addr32 d32
)
1347 vassert(dres
->whatNext
== Dis_Continue
);
1348 vassert(dres
->len
== 0);
1349 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1350 dres
->whatNext
= Dis_StopHere
;
1351 dres
->jk_StopHere
= kind
;
1352 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32
) ) );
1355 static void jmp_treg( /*MOD*/DisResult
* dres
,
1356 IRJumpKind kind
, IRTemp t
)
1358 vassert(dres
->whatNext
== Dis_Continue
);
1359 vassert(dres
->len
== 0);
1360 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1361 dres
->whatNext
= Dis_StopHere
;
1362 dres
->jk_StopHere
= kind
;
1363 stmt( IRStmt_Put( OFFB_EIP
, mkexpr(t
) ) );
1367 void jcc_01( /*MOD*/DisResult
* dres
,
1368 X86Condcode cond
, Addr32 d32_false
, Addr32 d32_true
)
1371 X86Condcode condPos
;
1372 vassert(dres
->whatNext
== Dis_Continue
);
1373 vassert(dres
->len
== 0);
1374 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1375 dres
->whatNext
= Dis_StopHere
;
1376 dres
->jk_StopHere
= Ijk_Boring
;
1377 condPos
= positiveIse_X86Condcode ( cond
, &invert
);
1379 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos
),
1381 IRConst_U32(d32_false
),
1383 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32_true
) ) );
1385 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos
),
1387 IRConst_U32(d32_true
),
1389 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32_false
) ) );
1394 /*------------------------------------------------------------*/
1395 /*--- Disassembling addressing modes ---*/
1396 /*------------------------------------------------------------*/
1399 const HChar
* sorbTxt ( UChar sorb
)
1402 case 0: return ""; /* no override */
1403 case 0x3E: return "%ds";
1404 case 0x26: return "%es:";
1405 case 0x64: return "%fs:";
1406 case 0x65: return "%gs:";
1407 case 0x36: return "%ss:";
1408 default: vpanic("sorbTxt(x86,guest)");
1413 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1414 linear address by adding any required segment override as indicated
1417 IRExpr
* handleSegOverride ( UChar sorb
, IRExpr
* virtual )
1421 IRTemp ldt_ptr
, gdt_ptr
, seg_selector
, r64
;
1424 /* the common case - no override */
1428 case 0x3E: sreg
= R_DS
; break;
1429 case 0x26: sreg
= R_ES
; break;
1430 case 0x64: sreg
= R_FS
; break;
1431 case 0x65: sreg
= R_GS
; break;
1432 case 0x36: sreg
= R_SS
; break;
1433 default: vpanic("handleSegOverride(x86,guest)");
1436 hWordTy
= sizeof(HWord
)==4 ? Ity_I32
: Ity_I64
;
1438 seg_selector
= newTemp(Ity_I32
);
1439 ldt_ptr
= newTemp(hWordTy
);
1440 gdt_ptr
= newTemp(hWordTy
);
1441 r64
= newTemp(Ity_I64
);
1443 assign( seg_selector
, unop(Iop_16Uto32
, getSReg(sreg
)) );
1444 assign( ldt_ptr
, IRExpr_Get( OFFB_LDT
, hWordTy
));
1445 assign( gdt_ptr
, IRExpr_Get( OFFB_GDT
, hWordTy
));
1448 Call this to do the translation and limit checks:
1449 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1450 UInt seg_selector, UInt virtual_addr )
1457 "x86g_use_seg_selector",
1458 &x86g_use_seg_selector
,
1459 mkIRExprVec_4( mkexpr(ldt_ptr
), mkexpr(gdt_ptr
),
1460 mkexpr(seg_selector
), virtual)
1464 /* If the high 32 of the result are non-zero, there was a
1465 failure in address translation. In which case, make a
1470 binop(Iop_CmpNE32
, unop(Iop_64HIto32
, mkexpr(r64
)), mkU32(0)),
1472 IRConst_U32( guest_EIP_curr_instr
),
1477 /* otherwise, here's the translated result. */
1478 return unop(Iop_64to32
, mkexpr(r64
));
1482 /* Generate IR to calculate an address indicated by a ModRM and
1483 following SIB bytes. The expression, and the number of bytes in
1484 the address mode, are returned. Note that this fn should not be
1485 called if the R/M part of the address denotes a register instead of
1486 memory. If print_codegen is true, text of the addressing mode is
1489 The computed address is stored in a new tempreg, and the
1490 identity of the tempreg is returned. */
1492 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr32
)
1494 IRTemp tmp
= newTemp(Ity_I32
);
1495 assign( tmp
, addr32
);
1500 IRTemp
disAMode ( Int
* len
, UChar sorb
, Int delta
, HChar
* buf
)
1502 UChar mod_reg_rm
= getIByte(delta
);
1507 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1508 jump table seems a bit excessive.
1510 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
1511 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
1512 /* is now XX0XXYYY */
1513 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
1514 switch (mod_reg_rm
) {
1516 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1519 case 0x00: case 0x01: case 0x02: case 0x03:
1520 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1521 { UChar rm
= mod_reg_rm
;
1522 DIS(buf
, "%s(%s)", sorbTxt(sorb
), nameIReg(4,rm
));
1524 return disAMode_copy2tmp(
1525 handleSegOverride(sorb
, getIReg(4,rm
)));
1528 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1529 --> GET %reg, t ; ADDL d8, t
1531 case 0x08: case 0x09: case 0x0A: case 0x0B:
1532 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1533 { UChar rm
= toUChar(mod_reg_rm
& 7);
1534 UInt d
= getSDisp8(delta
);
1535 DIS(buf
, "%s%d(%s)", sorbTxt(sorb
), (Int
)d
, nameIReg(4,rm
));
1537 return disAMode_copy2tmp(
1538 handleSegOverride(sorb
,
1539 binop(Iop_Add32
,getIReg(4,rm
),mkU32(d
))));
1542 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1543 --> GET %reg, t ; ADDL d8, t
1545 case 0x10: case 0x11: case 0x12: case 0x13:
1546 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1547 { UChar rm
= toUChar(mod_reg_rm
& 7);
1548 UInt d
= getUDisp32(delta
);
1549 DIS(buf
, "%s0x%x(%s)", sorbTxt(sorb
), d
, nameIReg(4,rm
));
1551 return disAMode_copy2tmp(
1552 handleSegOverride(sorb
,
1553 binop(Iop_Add32
,getIReg(4,rm
),mkU32(d
))));
1556 /* a register, %eax .. %edi. This shouldn't happen. */
1557 case 0x18: case 0x19: case 0x1A: case 0x1B:
1558 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1559 vpanic("disAMode(x86): not an addr!");
1561 /* a 32-bit literal address
1565 { UInt d
= getUDisp32(delta
);
1567 DIS(buf
, "%s(0x%x)", sorbTxt(sorb
), d
);
1568 return disAMode_copy2tmp(
1569 handleSegOverride(sorb
, mkU32(d
)));
1573 /* SIB, with no displacement. Special cases:
1574 -- %esp cannot act as an index value.
1575 If index_r indicates %esp, zero is used for the index.
1576 -- when mod is zero and base indicates EBP, base is instead
1578 It's all madness, I tell you. Extract %index, %base and
1579 scale from the SIB byte. The value denoted is then:
1580 | %index == %ESP && %base == %EBP
1581 = d32 following SIB byte
1582 | %index == %ESP && %base != %EBP
1584 | %index != %ESP && %base == %EBP
1585 = d32 following SIB byte + (%index << scale)
1586 | %index != %ESP && %base != %ESP
1587 = %base + (%index << scale)
1589 What happens to the souls of CPU architects who dream up such
1590 horrendous schemes, do you suppose?
1592 UChar sib
= getIByte(delta
);
1593 UChar scale
= toUChar((sib
>> 6) & 3);
1594 UChar index_r
= toUChar((sib
>> 3) & 7);
1595 UChar base_r
= toUChar(sib
& 7);
1598 if (index_r
!= R_ESP
&& base_r
!= R_EBP
) {
1599 DIS(buf
, "%s(%s,%s,%d)", sorbTxt(sorb
),
1600 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1604 handleSegOverride(sorb
,
1607 binop(Iop_Shl32
, getIReg(4,index_r
),
1611 if (index_r
!= R_ESP
&& base_r
== R_EBP
) {
1612 UInt d
= getUDisp32(delta
);
1613 DIS(buf
, "%s0x%x(,%s,%d)", sorbTxt(sorb
), d
,
1614 nameIReg(4,index_r
), 1<<scale
);
1618 handleSegOverride(sorb
,
1620 binop(Iop_Shl32
, getIReg(4,index_r
), mkU8(scale
)),
1624 if (index_r
== R_ESP
&& base_r
!= R_EBP
) {
1625 DIS(buf
, "%s(%s,,)", sorbTxt(sorb
), nameIReg(4,base_r
));
1627 return disAMode_copy2tmp(
1628 handleSegOverride(sorb
, getIReg(4,base_r
)));
1631 if (index_r
== R_ESP
&& base_r
== R_EBP
) {
1632 UInt d
= getUDisp32(delta
);
1633 DIS(buf
, "%s0x%x(,,)", sorbTxt(sorb
), d
);
1635 return disAMode_copy2tmp(
1636 handleSegOverride(sorb
, mkU32(d
)));
1642 /* SIB, with 8-bit displacement. Special cases:
1643 -- %esp cannot act as an index value.
1644 If index_r indicates %esp, zero is used for the index.
1649 = d8 + %base + (%index << scale)
1652 UChar sib
= getIByte(delta
);
1653 UChar scale
= toUChar((sib
>> 6) & 3);
1654 UChar index_r
= toUChar((sib
>> 3) & 7);
1655 UChar base_r
= toUChar(sib
& 7);
1656 UInt d
= getSDisp8(delta
+1);
1658 if (index_r
== R_ESP
) {
1659 DIS(buf
, "%s%d(%s,,)", sorbTxt(sorb
),
1660 (Int
)d
, nameIReg(4,base_r
));
1662 return disAMode_copy2tmp(
1663 handleSegOverride(sorb
,
1664 binop(Iop_Add32
, getIReg(4,base_r
), mkU32(d
)) ));
1666 DIS(buf
, "%s%d(%s,%s,%d)", sorbTxt(sorb
), (Int
)d
,
1667 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1671 handleSegOverride(sorb
,
1676 getIReg(4,index_r
), mkU8(scale
))),
1683 /* SIB, with 32-bit displacement. Special cases:
1684 -- %esp cannot act as an index value.
1685 If index_r indicates %esp, zero is used for the index.
1690 = d32 + %base + (%index << scale)
1693 UChar sib
= getIByte(delta
);
1694 UChar scale
= toUChar((sib
>> 6) & 3);
1695 UChar index_r
= toUChar((sib
>> 3) & 7);
1696 UChar base_r
= toUChar(sib
& 7);
1697 UInt d
= getUDisp32(delta
+1);
1699 if (index_r
== R_ESP
) {
1700 DIS(buf
, "%s%d(%s,,)", sorbTxt(sorb
),
1701 (Int
)d
, nameIReg(4,base_r
));
1703 return disAMode_copy2tmp(
1704 handleSegOverride(sorb
,
1705 binop(Iop_Add32
, getIReg(4,base_r
), mkU32(d
)) ));
1707 DIS(buf
, "%s%d(%s,%s,%d)", sorbTxt(sorb
), (Int
)d
,
1708 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1712 handleSegOverride(sorb
,
1717 getIReg(4,index_r
), mkU8(scale
))),
1725 vpanic("disAMode(x86)");
1726 return 0; /*notreached*/
1731 /* Figure out the number of (insn-stream) bytes constituting the amode
1732 beginning at delta. Is useful for getting hold of literals beyond
1733 the end of the amode before it has been disassembled. */
1735 static UInt
lengthAMode ( Int delta
)
1737 UChar mod_reg_rm
= getIByte(delta
); delta
++;
1739 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1740 jump table seems a bit excessive.
1742 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
1743 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
1744 /* is now XX0XXYYY */
1745 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
1746 switch (mod_reg_rm
) {
1748 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1749 case 0x00: case 0x01: case 0x02: case 0x03:
1750 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1753 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1754 case 0x08: case 0x09: case 0x0A: case 0x0B:
1755 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1758 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1759 case 0x10: case 0x11: case 0x12: case 0x13:
1760 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1763 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1764 case 0x18: case 0x19: case 0x1A: case 0x1B:
1765 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1768 /* a 32-bit literal address. */
1769 case 0x05: return 5;
1771 /* SIB, no displacement. */
1773 UChar sib
= getIByte(delta
);
1774 UChar base_r
= toUChar(sib
& 7);
1775 if (base_r
== R_EBP
) return 6; else return 2;
1777 /* SIB, with 8-bit displacement. */
1778 case 0x0C: return 3;
1780 /* SIB, with 32-bit displacement. */
1781 case 0x14: return 6;
1784 vpanic("lengthAMode");
1785 return 0; /*notreached*/
1789 /*------------------------------------------------------------*/
1790 /*--- Disassembling common idioms ---*/
1791 /*------------------------------------------------------------*/
1793 /* Handle binary integer instructions of the form
1796 Is passed the a ptr to the modRM byte, the actual operation, and the
1797 data size. Returns the address advanced completely over this
1800 E(src) is reg-or-mem
1803 If E is reg, --> GET %G, tmp
1807 If E is mem and OP is not reversible,
1808 --> (getAddr E) -> tmpa
1814 If E is mem and OP is reversible
1815 --> (getAddr E) -> tmpa
1821 UInt
dis_op2_E_G ( UChar sorb
,
1827 const HChar
* t_x86opc
)
1831 IRType ty
= szToITy(size
);
1832 IRTemp dst1
= newTemp(ty
);
1833 IRTemp src
= newTemp(ty
);
1834 IRTemp dst0
= newTemp(ty
);
1835 UChar rm
= getUChar(delta0
);
1836 IRTemp addr
= IRTemp_INVALID
;
1838 /* addSubCarry == True indicates the intended operation is
1839 add-with-carry or subtract-with-borrow. */
1841 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1845 if (epartIsReg(rm
)) {
1846 /* Specially handle XOR reg,reg, because that doesn't really
1847 depend on reg, and doing the obvious thing potentially
1848 generates a spurious value check failure due to the bogus
1849 dependency. Ditto SBB reg,reg. */
1850 if ((op8
== Iop_Xor8
|| (op8
== Iop_Sub8
&& addSubCarry
))
1851 && gregOfRM(rm
) == eregOfRM(rm
)) {
1852 putIReg(size
, gregOfRM(rm
), mkU(ty
,0));
1854 assign( dst0
, getIReg(size
,gregOfRM(rm
)) );
1855 assign( src
, getIReg(size
,eregOfRM(rm
)) );
1857 if (addSubCarry
&& op8
== Iop_Add8
) {
1858 helper_ADC( size
, dst1
, dst0
, src
,
1859 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1860 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1862 if (addSubCarry
&& op8
== Iop_Sub8
) {
1863 helper_SBB( size
, dst1
, dst0
, src
,
1864 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1865 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1867 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
1869 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1871 setFlags_DEP1(op8
, dst1
, ty
);
1873 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1876 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1877 nameIReg(size
,eregOfRM(rm
)),
1878 nameIReg(size
,gregOfRM(rm
)));
1881 /* E refers to memory */
1882 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
1883 assign( dst0
, getIReg(size
,gregOfRM(rm
)) );
1884 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
1886 if (addSubCarry
&& op8
== Iop_Add8
) {
1887 helper_ADC( size
, dst1
, dst0
, src
,
1888 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1889 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1891 if (addSubCarry
&& op8
== Iop_Sub8
) {
1892 helper_SBB( size
, dst1
, dst0
, src
,
1893 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1894 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1896 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
1898 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1900 setFlags_DEP1(op8
, dst1
, ty
);
1902 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1905 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1906 dis_buf
,nameIReg(size
,gregOfRM(rm
)));
1913 /* Handle binary integer instructions of the form
1916 Is passed the a ptr to the modRM byte, the actual operation, and the
1917 data size. Returns the address advanced completely over this
1921 E(dst) is reg-or-mem
1923 If E is reg, --> GET %E, tmp
1927 If E is mem, --> (getAddr E) -> tmpa
1933 UInt
dis_op2_G_E ( UChar sorb
,
1940 const HChar
* t_x86opc
)
1944 IRType ty
= szToITy(size
);
1945 IRTemp dst1
= newTemp(ty
);
1946 IRTemp src
= newTemp(ty
);
1947 IRTemp dst0
= newTemp(ty
);
1948 UChar rm
= getIByte(delta0
);
1949 IRTemp addr
= IRTemp_INVALID
;
1951 /* addSubCarry == True indicates the intended operation is
1952 add-with-carry or subtract-with-borrow. */
1954 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1958 if (epartIsReg(rm
)) {
1959 /* Specially handle XOR reg,reg, because that doesn't really
1960 depend on reg, and doing the obvious thing potentially
1961 generates a spurious value check failure due to the bogus
1962 dependency. Ditto SBB reg,reg.*/
1963 if ((op8
== Iop_Xor8
|| (op8
== Iop_Sub8
&& addSubCarry
))
1964 && gregOfRM(rm
) == eregOfRM(rm
)) {
1965 putIReg(size
, eregOfRM(rm
), mkU(ty
,0));
1967 assign(dst0
, getIReg(size
,eregOfRM(rm
)));
1968 assign(src
, getIReg(size
,gregOfRM(rm
)));
1970 if (addSubCarry
&& op8
== Iop_Add8
) {
1971 helper_ADC( size
, dst1
, dst0
, src
,
1972 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1973 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1975 if (addSubCarry
&& op8
== Iop_Sub8
) {
1976 helper_SBB( size
, dst1
, dst0
, src
,
1977 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1978 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1980 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
1982 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1984 setFlags_DEP1(op8
, dst1
, ty
);
1986 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1989 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1990 nameIReg(size
,gregOfRM(rm
)),
1991 nameIReg(size
,eregOfRM(rm
)));
1995 /* E refers to memory */
1997 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
1998 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
1999 assign(src
, getIReg(size
,gregOfRM(rm
)));
2001 if (addSubCarry
&& op8
== Iop_Add8
) {
2003 /* cas-style store */
2004 helper_ADC( size
, dst1
, dst0
, src
,
2005 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2008 helper_ADC( size
, dst1
, dst0
, src
,
2009 /*store*/addr
, IRTemp_INVALID
, 0 );
2012 if (addSubCarry
&& op8
== Iop_Sub8
) {
2014 /* cas-style store */
2015 helper_SBB( size
, dst1
, dst0
, src
,
2016 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2019 helper_SBB( size
, dst1
, dst0
, src
,
2020 /*store*/addr
, IRTemp_INVALID
, 0 );
2023 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2026 if (0) vex_printf("locked case\n" );
2027 casLE( mkexpr(addr
),
2028 mkexpr(dst0
)/*expval*/,
2029 mkexpr(dst1
)/*newval*/, guest_EIP_curr_instr
);
2031 if (0) vex_printf("nonlocked case\n");
2032 storeLE(mkexpr(addr
), mkexpr(dst1
));
2036 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2038 setFlags_DEP1(op8
, dst1
, ty
);
2041 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
2042 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
2048 /* Handle move instructions of the form
2051 Is passed the a ptr to the modRM byte, and the data size. Returns
2052 the address advanced completely over this instruction.
2054 E(src) is reg-or-mem
2057 If E is reg, --> GET %E, tmpv
2060 If E is mem --> (getAddr E) -> tmpa
2065 UInt
dis_mov_E_G ( UChar sorb
,
2070 UChar rm
= getIByte(delta0
);
2073 if (epartIsReg(rm
)) {
2074 putIReg(size
, gregOfRM(rm
), getIReg(size
, eregOfRM(rm
)));
2075 DIP("mov%c %s,%s\n", nameISize(size
),
2076 nameIReg(size
,eregOfRM(rm
)),
2077 nameIReg(size
,gregOfRM(rm
)));
2081 /* E refers to memory */
2083 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2084 putIReg(size
, gregOfRM(rm
), loadLE(szToITy(size
), mkexpr(addr
)));
2085 DIP("mov%c %s,%s\n", nameISize(size
),
2086 dis_buf
,nameIReg(size
,gregOfRM(rm
)));
2092 /* Handle move instructions of the form
2095 Is passed the a ptr to the modRM byte, and the data size. Returns
2096 the address advanced completely over this instruction.
2099 E(dst) is reg-or-mem
2101 If E is reg, --> GET %G, tmp
2104 If E is mem, --> (getAddr E) -> tmpa
2109 UInt
dis_mov_G_E ( UChar sorb
,
2114 UChar rm
= getIByte(delta0
);
2117 if (epartIsReg(rm
)) {
2118 putIReg(size
, eregOfRM(rm
), getIReg(size
, gregOfRM(rm
)));
2119 DIP("mov%c %s,%s\n", nameISize(size
),
2120 nameIReg(size
,gregOfRM(rm
)),
2121 nameIReg(size
,eregOfRM(rm
)));
2125 /* E refers to memory */
2127 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2128 storeLE( mkexpr(addr
), getIReg(size
, gregOfRM(rm
)) );
2129 DIP("mov%c %s,%s\n", nameISize(size
),
2130 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
2136 /* op $immediate, AL/AX/EAX. */
2138 UInt
dis_op_imm_A ( Int size
,
2143 const HChar
* t_x86opc
)
2145 IRType ty
= szToITy(size
);
2146 IRTemp dst0
= newTemp(ty
);
2147 IRTemp src
= newTemp(ty
);
2148 IRTemp dst1
= newTemp(ty
);
2149 UInt lit
= getUDisp(size
,delta
);
2150 assign(dst0
, getIReg(size
,R_EAX
));
2151 assign(src
, mkU(ty
,lit
));
2153 if (isAddSub(op8
) && !carrying
) {
2154 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
2155 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2160 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
2161 setFlags_DEP1(op8
, dst1
, ty
);
2164 if (op8
== Iop_Add8
&& carrying
) {
2165 helper_ADC( size
, dst1
, dst0
, src
,
2166 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2169 if (op8
== Iop_Sub8
&& carrying
) {
2170 helper_SBB( size
, dst1
, dst0
, src
,
2171 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2174 vpanic("dis_op_imm_A(x86,guest)");
2177 putIReg(size
, R_EAX
, mkexpr(dst1
));
2179 DIP("%s%c $0x%x, %s\n", t_x86opc
, nameISize(size
),
2180 lit
, nameIReg(size
,R_EAX
));
2185 /* Sign- and Zero-extending moves. */
2187 UInt
dis_movx_E_G ( UChar sorb
,
2188 Int delta
, Int szs
, Int szd
, Bool sign_extend
)
2190 UChar rm
= getIByte(delta
);
2191 if (epartIsReg(rm
)) {
2193 // mutant case. See #250799
2194 putIReg(szd
, gregOfRM(rm
),
2195 getIReg(szs
,eregOfRM(rm
)));
2198 putIReg(szd
, gregOfRM(rm
),
2199 unop(mkWidenOp(szs
,szd
,sign_extend
),
2200 getIReg(szs
,eregOfRM(rm
))));
2202 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
2203 nameISize(szs
), nameISize(szd
),
2204 nameIReg(szs
,eregOfRM(rm
)),
2205 nameIReg(szd
,gregOfRM(rm
)));
2209 /* E refers to memory */
2213 IRTemp addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2215 // mutant case. See #250799
2216 putIReg(szd
, gregOfRM(rm
),
2217 loadLE(szToITy(szs
),mkexpr(addr
)));
2220 putIReg(szd
, gregOfRM(rm
),
2221 unop(mkWidenOp(szs
,szd
,sign_extend
),
2222 loadLE(szToITy(szs
),mkexpr(addr
))));
2224 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
2225 nameISize(szs
), nameISize(szd
),
2226 dis_buf
, nameIReg(szd
,gregOfRM(rm
)));
2232 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2233 16 / 8 bit quantity in the given IRTemp. */
2235 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
2237 IROp op
= signed_divide
? Iop_DivModS64to32
: Iop_DivModU64to32
;
2238 IRTemp src64
= newTemp(Ity_I64
);
2239 IRTemp dst64
= newTemp(Ity_I64
);
2242 assign( src64
, binop(Iop_32HLto64
,
2243 getIReg(4,R_EDX
), getIReg(4,R_EAX
)) );
2244 assign( dst64
, binop(op
, mkexpr(src64
), mkexpr(t
)) );
2245 putIReg( 4, R_EAX
, unop(Iop_64to32
,mkexpr(dst64
)) );
2246 putIReg( 4, R_EDX
, unop(Iop_64HIto32
,mkexpr(dst64
)) );
2249 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
2250 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
2251 assign( src64
, unop(widen3264
,
2253 getIReg(2,R_EDX
), getIReg(2,R_EAX
))) );
2254 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
2255 putIReg( 2, R_EAX
, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
2256 putIReg( 2, R_EDX
, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
2260 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
2261 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
2262 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
2263 assign( src64
, unop(widen3264
, unop(widen1632
, getIReg(2,R_EAX
))) );
2265 binop(op
, mkexpr(src64
),
2266 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
2267 putIReg( 1, R_AL
, unop(Iop_16to8
, unop(Iop_32to16
,
2268 unop(Iop_64to32
,mkexpr(dst64
)))) );
2269 putIReg( 1, R_AH
, unop(Iop_16to8
, unop(Iop_32to16
,
2270 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
2273 default: vpanic("codegen_div(x86)");
2279 UInt
dis_Grp1 ( UChar sorb
, Bool locked
,
2280 Int delta
, UChar modrm
,
2281 Int am_sz
, Int d_sz
, Int sz
, UInt d32
)
2285 IRType ty
= szToITy(sz
);
2286 IRTemp dst1
= newTemp(ty
);
2287 IRTemp src
= newTemp(ty
);
2288 IRTemp dst0
= newTemp(ty
);
2289 IRTemp addr
= IRTemp_INVALID
;
2290 IROp op8
= Iop_INVALID
;
2291 UInt mask
= sz
==1 ? 0xFF : (sz
==2 ? 0xFFFF : 0xFFFFFFFF);
2293 switch (gregOfRM(modrm
)) {
2294 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
2295 case 2: break; // ADC
2296 case 3: break; // SBB
2297 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
2298 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
2300 default: vpanic("dis_Grp1: unhandled case");
2303 if (epartIsReg(modrm
)) {
2304 vassert(am_sz
== 1);
2306 assign(dst0
, getIReg(sz
,eregOfRM(modrm
)));
2307 assign(src
, mkU(ty
,d32
& mask
));
2309 if (gregOfRM(modrm
) == 2 /* ADC */) {
2310 helper_ADC( sz
, dst1
, dst0
, src
,
2311 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2313 if (gregOfRM(modrm
) == 3 /* SBB */) {
2314 helper_SBB( sz
, dst1
, dst0
, src
,
2315 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2317 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2319 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2321 setFlags_DEP1(op8
, dst1
, ty
);
2324 if (gregOfRM(modrm
) < 7)
2325 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2327 delta
+= (am_sz
+ d_sz
);
2328 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm
)), nameISize(sz
), d32
,
2329 nameIReg(sz
,eregOfRM(modrm
)));
2331 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2333 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2334 assign(src
, mkU(ty
,d32
& mask
));
2336 if (gregOfRM(modrm
) == 2 /* ADC */) {
2338 /* cas-style store */
2339 helper_ADC( sz
, dst1
, dst0
, src
,
2340 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2343 helper_ADC( sz
, dst1
, dst0
, src
,
2344 /*store*/addr
, IRTemp_INVALID
, 0 );
2347 if (gregOfRM(modrm
) == 3 /* SBB */) {
2349 /* cas-style store */
2350 helper_SBB( sz
, dst1
, dst0
, src
,
2351 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2354 helper_SBB( sz
, dst1
, dst0
, src
,
2355 /*store*/addr
, IRTemp_INVALID
, 0 );
2358 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2359 if (gregOfRM(modrm
) < 7) {
2361 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
2362 mkexpr(dst1
)/*newVal*/,
2363 guest_EIP_curr_instr
);
2365 storeLE(mkexpr(addr
), mkexpr(dst1
));
2369 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2371 setFlags_DEP1(op8
, dst1
, ty
);
2374 delta
+= (len
+d_sz
);
2375 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm
)), nameISize(sz
),
2382 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2386 UInt
dis_Grp2 ( UChar sorb
,
2387 Int delta
, UChar modrm
,
2388 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
2389 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
2391 /* delta on entry points at the modrm byte. */
2394 Bool isShift
, isRotate
, isRotateC
;
2395 IRType ty
= szToITy(sz
);
2396 IRTemp dst0
= newTemp(ty
);
2397 IRTemp dst1
= newTemp(ty
);
2398 IRTemp addr
= IRTemp_INVALID
;
2402 vassert(sz
== 1 || sz
== 2 || sz
== 4);
2404 /* Put value to shift/rotate in dst0. */
2405 if (epartIsReg(modrm
)) {
2406 assign(dst0
, getIReg(sz
, eregOfRM(modrm
)));
2407 delta
+= (am_sz
+ d_sz
);
2409 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2410 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2411 delta
+= len
+ d_sz
;
2415 switch (gregOfRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
2418 switch (gregOfRM(modrm
)) { case 0: case 1: isRotate
= True
; }
2421 switch (gregOfRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
2423 if (!isShift
&& !isRotate
&& !isRotateC
) {
2425 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2429 /* call a helper; these insns are so ridiculous they do not
2431 Bool left
= toBool(gregOfRM(modrm
) == 2);
2432 IRTemp r64
= newTemp(Ity_I64
);
2434 = mkIRExprVec_4( widenUto32(mkexpr(dst0
)), /* thing to rotate */
2435 widenUto32(shift_expr
), /* rotate amount */
2436 widenUto32(mk_x86g_calculate_eflags_all()),
2438 assign( r64
, mkIRExprCCall(
2441 left
? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2442 left
? &x86g_calculate_RCL
: &x86g_calculate_RCR
,
2446 /* new eflags in hi half r64; new value in lo half r64 */
2447 assign( dst1
, narrowTo(ty
, unop(Iop_64to32
, mkexpr(r64
))) );
2448 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
2449 stmt( IRStmt_Put( OFFB_CC_DEP1
, unop(Iop_64HIto32
, mkexpr(r64
)) ));
2450 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
2451 /* Set NDEP even though it isn't used. This makes redundant-PUT
2452 elimination of previous stores to this field work better. */
2453 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
2458 IRTemp pre32
= newTemp(Ity_I32
);
2459 IRTemp res32
= newTemp(Ity_I32
);
2460 IRTemp res32ss
= newTemp(Ity_I32
);
2461 IRTemp shift_amt
= newTemp(Ity_I8
);
2464 switch (gregOfRM(modrm
)) {
2465 case 4: op32
= Iop_Shl32
; break;
2466 case 5: op32
= Iop_Shr32
; break;
2467 case 6: op32
= Iop_Shl32
; break;
2468 case 7: op32
= Iop_Sar32
; break;
2470 default: vpanic("dis_Grp2:shift"); break;
2473 /* Widen the value to be shifted to 32 bits, do the shift, and
2474 narrow back down. This seems surprisingly long-winded, but
2475 unfortunately the Intel semantics requires that 8/16-bit
2476 shifts give defined results for shift values all the way up
2477 to 31, and this seems the simplest way to do it. It has the
2478 advantage that the only IR level shifts generated are of 32
2479 bit values, and the shift amount is guaranteed to be in the
2480 range 0 .. 31, thereby observing the IR semantics requiring
2481 all shift values to be in the range 0 .. 2^word_size-1. */
2483 /* shift_amt = shift_expr & 31, regardless of operation size */
2484 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(31)) );
2486 /* suitably widen the value to be shifted to 32 bits. */
2487 assign( pre32
, op32
==Iop_Sar32
? widenSto32(mkexpr(dst0
))
2488 : widenUto32(mkexpr(dst0
)) );
2490 /* res32 = pre32 `shift` shift_amt */
2491 assign( res32
, binop(op32
, mkexpr(pre32
), mkexpr(shift_amt
)) );
2493 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2499 mkexpr(shift_amt
), mkU8(1)),
2502 /* Build the flags thunk. */
2503 setFlags_DEP1_DEP2_shift(op32
, res32
, res32ss
, ty
, shift_amt
);
2505 /* Narrow the result back down. */
2506 assign( dst1
, narrowTo(ty
, mkexpr(res32
)) );
2508 } /* if (isShift) */
2512 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
2513 Bool left
= toBool(gregOfRM(modrm
) == 0);
2514 IRTemp rot_amt
= newTemp(Ity_I8
);
2515 IRTemp rot_amt32
= newTemp(Ity_I8
);
2516 IRTemp oldFlags
= newTemp(Ity_I32
);
2518 /* rot_amt = shift_expr & mask */
2519 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2520 expressions never shift beyond the word size and thus remain
2522 assign(rot_amt32
, binop(Iop_And8
, shift_expr
, mkU8(31)));
2525 assign(rot_amt
, mkexpr(rot_amt32
));
2527 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt32
), mkU8(8*sz
-1)));
2531 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2533 binop( mkSizedOp(ty
,Iop_Or8
),
2534 binop( mkSizedOp(ty
,Iop_Shl8
),
2538 binop( mkSizedOp(ty
,Iop_Shr8
),
2540 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
2544 ccOp
+= X86G_CC_OP_ROLB
;
2546 } else { /* right */
2548 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2550 binop( mkSizedOp(ty
,Iop_Or8
),
2551 binop( mkSizedOp(ty
,Iop_Shr8
),
2555 binop( mkSizedOp(ty
,Iop_Shl8
),
2557 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
2561 ccOp
+= X86G_CC_OP_RORB
;
2565 /* dst1 now holds the rotated value. Build flag thunk. We
2566 need the resulting value for this, and the previous flags.
2567 Except don't set it if the rotate count is zero. */
2569 assign(oldFlags
, mk_x86g_calculate_eflags_all());
2571 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2572 IRTemp rot_amt32b
= newTemp(Ity_I1
);
2573 assign(rot_amt32b
, binop(Iop_CmpNE8
, mkexpr(rot_amt32
), mkU8(0)) );
2575 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2576 stmt( IRStmt_Put( OFFB_CC_OP
,
2577 IRExpr_ITE( mkexpr(rot_amt32b
),
2579 IRExpr_Get(OFFB_CC_OP
,Ity_I32
) ) ));
2580 stmt( IRStmt_Put( OFFB_CC_DEP1
,
2581 IRExpr_ITE( mkexpr(rot_amt32b
),
2582 widenUto32(mkexpr(dst1
)),
2583 IRExpr_Get(OFFB_CC_DEP1
,Ity_I32
) ) ));
2584 stmt( IRStmt_Put( OFFB_CC_DEP2
,
2585 IRExpr_ITE( mkexpr(rot_amt32b
),
2587 IRExpr_Get(OFFB_CC_DEP2
,Ity_I32
) ) ));
2588 stmt( IRStmt_Put( OFFB_CC_NDEP
,
2589 IRExpr_ITE( mkexpr(rot_amt32b
),
2591 IRExpr_Get(OFFB_CC_NDEP
,Ity_I32
) ) ));
2592 } /* if (isRotate) */
2594 /* Save result, and finish up. */
2595 if (epartIsReg(modrm
)) {
2596 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2597 if (vex_traceflags
& VEX_TRACE_FE
) {
2599 nameGrp2(gregOfRM(modrm
)), nameISize(sz
) );
2601 vex_printf("%s", shift_expr_txt
);
2603 ppIRExpr(shift_expr
);
2604 vex_printf(", %s\n", nameIReg(sz
,eregOfRM(modrm
)));
2607 storeLE(mkexpr(addr
), mkexpr(dst1
));
2608 if (vex_traceflags
& VEX_TRACE_FE
) {
2610 nameGrp2(gregOfRM(modrm
)), nameISize(sz
) );
2612 vex_printf("%s", shift_expr_txt
);
2614 ppIRExpr(shift_expr
);
2615 vex_printf(", %s\n", dis_buf
);
2622 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2624 UInt
dis_Grp8_Imm ( UChar sorb
,
2626 Int delta
, UChar modrm
,
2627 Int am_sz
, Int sz
, UInt src_val
,
2630 /* src_val denotes a d8.
2631 And delta on entry points at the modrm byte. */
2633 IRType ty
= szToITy(sz
);
2634 IRTemp t2
= newTemp(Ity_I32
);
2635 IRTemp t2m
= newTemp(Ity_I32
);
2636 IRTemp t_addr
= IRTemp_INVALID
;
2640 /* we're optimists :-) */
2643 /* Limit src_val -- the bit offset -- to something within a word.
2644 The Intel docs say that literal offsets larger than a word are
2645 masked in this way. */
2647 case 2: src_val
&= 15; break;
2648 case 4: src_val
&= 31; break;
2649 default: *decode_OK
= False
; return delta
;
2652 /* Invent a mask suitable for the operation. */
2653 switch (gregOfRM(modrm
)) {
2654 case 4: /* BT */ mask
= 0; break;
2655 case 5: /* BTS */ mask
= 1 << src_val
; break;
2656 case 6: /* BTR */ mask
= ~(1 << src_val
); break;
2657 case 7: /* BTC */ mask
= 1 << src_val
; break;
2658 /* If this needs to be extended, probably simplest to make a
2659 new function to handle the other cases (0 .. 3). The
2660 Intel docs do however not indicate any use for 0 .. 3, so
2661 we don't expect this to happen. */
2662 default: *decode_OK
= False
; return delta
;
2665 /* Fetch the value to be tested and modified into t2, which is
2666 32-bits wide regardless of sz. */
2667 if (epartIsReg(modrm
)) {
2668 vassert(am_sz
== 1);
2669 assign( t2
, widenUto32(getIReg(sz
, eregOfRM(modrm
))) );
2670 delta
+= (am_sz
+ 1);
2671 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm
)), nameISize(sz
),
2672 src_val
, nameIReg(sz
,eregOfRM(modrm
)));
2675 t_addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2677 assign( t2
, widenUto32(loadLE(ty
, mkexpr(t_addr
))) );
2678 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm
)), nameISize(sz
),
2682 /* Compute the new value into t2m, if non-BT. */
2683 switch (gregOfRM(modrm
)) {
2687 assign( t2m
, binop(Iop_Or32
, mkU32(mask
), mkexpr(t2
)) );
2690 assign( t2m
, binop(Iop_And32
, mkU32(mask
), mkexpr(t2
)) );
2693 assign( t2m
, binop(Iop_Xor32
, mkU32(mask
), mkexpr(t2
)) );
2696 /*NOTREACHED*/ /*the previous switch guards this*/
2700 /* Write the result back, if non-BT. If the CAS fails then we
2701 side-exit from the trace at this point, and so the flag state is
2702 not affected. This is of course as required. */
2703 if (gregOfRM(modrm
) != 4 /* BT */) {
2704 if (epartIsReg(modrm
)) {
2705 putIReg(sz
, eregOfRM(modrm
), narrowTo(ty
, mkexpr(t2m
)));
2708 casLE( mkexpr(t_addr
),
2709 narrowTo(ty
, mkexpr(t2
))/*expd*/,
2710 narrowTo(ty
, mkexpr(t2m
))/*new*/,
2711 guest_EIP_curr_instr
);
2713 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
2718 /* Copy relevant bit from t2 into the carry flag. */
2719 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2720 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
2721 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
2725 binop(Iop_Shr32
, mkexpr(t2
), mkU8(src_val
)),
2728 /* Set NDEP even though it isn't used. This makes redundant-PUT
2729 elimination of previous stores to this field work better. */
2730 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
2736 /* Signed/unsigned widening multiply. Generate IR to multiply the
2737 value in EAX/AX/AL by the given IRTemp, and park the result in
2740 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
2741 IRTemp tmp
, const HChar
* tmp_txt
)
2743 IRType ty
= szToITy(sz
);
2744 IRTemp t1
= newTemp(ty
);
2746 assign( t1
, getIReg(sz
, R_EAX
) );
2750 IRTemp res64
= newTemp(Ity_I64
);
2751 IRTemp resHi
= newTemp(Ity_I32
);
2752 IRTemp resLo
= newTemp(Ity_I32
);
2753 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
2754 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2755 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
2756 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2757 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
2758 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
2759 putIReg(4, R_EDX
, mkexpr(resHi
));
2760 putIReg(4, R_EAX
, mkexpr(resLo
));
2764 IRTemp res32
= newTemp(Ity_I32
);
2765 IRTemp resHi
= newTemp(Ity_I16
);
2766 IRTemp resLo
= newTemp(Ity_I16
);
2767 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
2768 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2769 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
2770 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2771 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
2772 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
2773 putIReg(2, R_EDX
, mkexpr(resHi
));
2774 putIReg(2, R_EAX
, mkexpr(resLo
));
2778 IRTemp res16
= newTemp(Ity_I16
);
2779 IRTemp resHi
= newTemp(Ity_I8
);
2780 IRTemp resLo
= newTemp(Ity_I8
);
2781 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
2782 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2783 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
2784 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2785 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
2786 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
2787 putIReg(2, R_EAX
, mkexpr(res16
));
2791 vpanic("codegen_mulL_A_D(x86)");
2793 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
2797 /* Group 3 extended opcodes. */
2799 UInt
dis_Grp3 ( UChar sorb
, Bool locked
, Int sz
, Int delta
, Bool
* decode_OK
)
2806 IRType ty
= szToITy(sz
);
2807 IRTemp t1
= newTemp(ty
);
2808 IRTemp dst1
, src
, dst0
;
2810 *decode_OK
= True
; /* may change this later */
2812 modrm
= getIByte(delta
);
2814 if (locked
&& (gregOfRM(modrm
) != 2 && gregOfRM(modrm
) != 3)) {
2815 /* LOCK prefix only allowed with not and neg subopcodes */
2820 if (epartIsReg(modrm
)) {
2821 switch (gregOfRM(modrm
)) {
2822 case 0: { /* TEST */
2823 delta
++; d32
= getUDisp(sz
, delta
); delta
+= sz
;
2825 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
2826 getIReg(sz
,eregOfRM(modrm
)),
2828 setFlags_DEP1( Iop_And8
, dst1
, ty
);
2829 DIP("test%c $0x%x, %s\n", nameISize(sz
), d32
,
2830 nameIReg(sz
, eregOfRM(modrm
)));
2833 case 1: /* UNDEFINED */
2834 /* The Intel docs imply this insn is undefined and binutils
2835 agrees. Unfortunately Core 2 will run it (with who
2836 knows what result?) sandpile.org reckons it's an alias
2837 for case 0. We play safe. */
2842 putIReg(sz
, eregOfRM(modrm
),
2843 unop(mkSizedOp(ty
,Iop_Not8
),
2844 getIReg(sz
, eregOfRM(modrm
))));
2845 DIP("not%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2852 assign(dst0
, mkU(ty
,0));
2853 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2854 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
), mkexpr(src
)));
2855 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
2856 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2857 DIP("neg%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2859 case 4: /* MUL (unsigned widening) */
2862 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2863 codegen_mulL_A_D ( sz
, False
, src
, nameIReg(sz
,eregOfRM(modrm
)) );
2865 case 5: /* IMUL (signed widening) */
2868 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2869 codegen_mulL_A_D ( sz
, True
, src
, nameIReg(sz
,eregOfRM(modrm
)) );
2873 assign( t1
, getIReg(sz
, eregOfRM(modrm
)) );
2874 codegen_div ( sz
, t1
, False
);
2875 DIP("div%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2879 assign( t1
, getIReg(sz
, eregOfRM(modrm
)) );
2880 codegen_div ( sz
, t1
, True
);
2881 DIP("idiv%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2884 /* This can't happen - gregOfRM should return 0 .. 7 only */
2885 vpanic("Grp3(x86)");
2888 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2891 assign(t1
, loadLE(ty
,mkexpr(addr
)));
2892 switch (gregOfRM(modrm
)) {
2893 case 0: { /* TEST */
2894 d32
= getUDisp(sz
, delta
); delta
+= sz
;
2896 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
2897 mkexpr(t1
), mkU(ty
,d32
)));
2898 setFlags_DEP1( Iop_And8
, dst1
, ty
);
2899 DIP("test%c $0x%x, %s\n", nameISize(sz
), d32
, dis_buf
);
2902 case 1: /* UNDEFINED */
2903 /* See comment above on R case */
2908 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
2910 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
2911 guest_EIP_curr_instr
);
2913 storeLE( mkexpr(addr
), mkexpr(dst1
) );
2915 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
2921 assign(dst0
, mkU(ty
,0));
2922 assign(src
, mkexpr(t1
));
2923 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
),
2924 mkexpr(dst0
), mkexpr(src
)));
2926 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
2927 guest_EIP_curr_instr
);
2929 storeLE( mkexpr(addr
), mkexpr(dst1
) );
2931 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
2932 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
2935 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
2938 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
2941 codegen_div ( sz
, t1
, False
);
2942 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
2945 codegen_div ( sz
, t1
, True
);
2946 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
2949 /* This can't happen - gregOfRM should return 0 .. 7 only */
2950 vpanic("Grp3(x86)");
2957 /* Group 4 extended opcodes. */
2959 UInt
dis_Grp4 ( UChar sorb
, Bool locked
, Int delta
, Bool
* decode_OK
)
2965 IRTemp t1
= newTemp(ty
);
2966 IRTemp t2
= newTemp(ty
);
2970 modrm
= getIByte(delta
);
2972 if (locked
&& (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)) {
2973 /* LOCK prefix only allowed with inc and dec subopcodes */
2978 if (epartIsReg(modrm
)) {
2979 assign(t1
, getIReg(1, eregOfRM(modrm
)));
2980 switch (gregOfRM(modrm
)) {
2982 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
2983 putIReg(1, eregOfRM(modrm
), mkexpr(t2
));
2984 setFlags_INC_DEC( True
, t2
, ty
);
2987 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
2988 putIReg(1, eregOfRM(modrm
), mkexpr(t2
));
2989 setFlags_INC_DEC( False
, t2
, ty
);
2996 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm
)),
2997 nameIReg(1, eregOfRM(modrm
)));
2999 IRTemp addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
3000 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
3001 switch (gregOfRM(modrm
)) {
3003 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
3005 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
3006 guest_EIP_curr_instr
);
3008 storeLE( mkexpr(addr
), mkexpr(t2
) );
3010 setFlags_INC_DEC( True
, t2
, ty
);
3013 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
3015 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
3016 guest_EIP_curr_instr
);
3018 storeLE( mkexpr(addr
), mkexpr(t2
) );
3020 setFlags_INC_DEC( False
, t2
, ty
);
3027 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm
)), dis_buf
);
3033 /* Group 5 extended opcodes. */
3035 UInt
dis_Grp5 ( UChar sorb
, Bool locked
, Int sz
, Int delta
,
3036 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
3041 IRTemp addr
= IRTemp_INVALID
;
3042 IRType ty
= szToITy(sz
);
3043 IRTemp t1
= newTemp(ty
);
3044 IRTemp t2
= IRTemp_INVALID
;
3048 modrm
= getIByte(delta
);
3050 if (locked
&& (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)) {
3051 /* LOCK prefix only allowed with inc and dec subopcodes */
3056 if (epartIsReg(modrm
)) {
3057 assign(t1
, getIReg(sz
,eregOfRM(modrm
)));
3058 switch (gregOfRM(modrm
)) {
3060 vassert(sz
== 2 || sz
== 4);
3062 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
3063 mkexpr(t1
), mkU(ty
,1)));
3064 setFlags_INC_DEC( True
, t2
, ty
);
3065 putIReg(sz
,eregOfRM(modrm
),mkexpr(t2
));
3068 vassert(sz
== 2 || sz
== 4);
3070 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
3071 mkexpr(t1
), mkU(ty
,1)));
3072 setFlags_INC_DEC( False
, t2
, ty
);
3073 putIReg(sz
,eregOfRM(modrm
),mkexpr(t2
));
3075 case 2: /* call Ev */
3077 t2
= newTemp(Ity_I32
);
3078 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
3079 putIReg(4, R_ESP
, mkexpr(t2
));
3080 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
+1));
3081 jmp_treg(dres
, Ijk_Call
, t1
);
3082 vassert(dres
->whatNext
== Dis_StopHere
);
3084 case 4: /* jmp Ev */
3086 jmp_treg(dres
, Ijk_Boring
, t1
);
3087 vassert(dres
->whatNext
== Dis_StopHere
);
3089 case 6: /* PUSH Ev */
3090 vassert(sz
== 4 || sz
== 2);
3091 t2
= newTemp(Ity_I32
);
3092 assign( t2
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
3093 putIReg(4, R_ESP
, mkexpr(t2
) );
3094 storeLE( mkexpr(t2
), mkexpr(t1
) );
3101 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm
)),
3102 nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
3104 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
3105 assign(t1
, loadLE(ty
,mkexpr(addr
)));
3106 switch (gregOfRM(modrm
)) {
3109 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
3110 mkexpr(t1
), mkU(ty
,1)));
3112 casLE( mkexpr(addr
),
3113 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
3115 storeLE(mkexpr(addr
),mkexpr(t2
));
3117 setFlags_INC_DEC( True
, t2
, ty
);
3121 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
3122 mkexpr(t1
), mkU(ty
,1)));
3124 casLE( mkexpr(addr
),
3125 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
3127 storeLE(mkexpr(addr
),mkexpr(t2
));
3129 setFlags_INC_DEC( False
, t2
, ty
);
3131 case 2: /* call Ev */
3133 t2
= newTemp(Ity_I32
);
3134 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
3135 putIReg(4, R_ESP
, mkexpr(t2
));
3136 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
+len
));
3137 jmp_treg(dres
, Ijk_Call
, t1
);
3138 vassert(dres
->whatNext
== Dis_StopHere
);
3140 case 4: /* JMP Ev */
3142 jmp_treg(dres
, Ijk_Boring
, t1
);
3143 vassert(dres
->whatNext
== Dis_StopHere
);
3145 case 6: /* PUSH Ev */
3146 vassert(sz
== 4 || sz
== 2);
3147 t2
= newTemp(Ity_I32
);
3148 assign( t2
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
3149 putIReg(4, R_ESP
, mkexpr(t2
) );
3150 storeLE( mkexpr(t2
), mkexpr(t1
) );
3157 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm
)),
3158 nameISize(sz
), dis_buf
);
3164 /*------------------------------------------------------------*/
3165 /*--- Disassembling string ops (including REP prefixes) ---*/
3166 /*------------------------------------------------------------*/
3168 /* Code shared by all the string ops */
3170 void dis_string_op_increment(Int sz
, IRTemp t_inc
)
3172 if (sz
== 4 || sz
== 2) {
3174 binop(Iop_Shl32
, IRExpr_Get( OFFB_DFLAG
, Ity_I32
),
3178 IRExpr_Get( OFFB_DFLAG
, Ity_I32
) );
3183 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
),
3184 Int sz
, const HChar
* name
, UChar sorb
)
3186 IRTemp t_inc
= newTemp(Ity_I32
);
3187 vassert(sorb
== 0); /* hmm. so what was the point of passing it in? */
3188 dis_string_op_increment(sz
, t_inc
);
3189 dis_OP( sz
, t_inc
);
3190 DIP("%s%c\n", name
, nameISize(sz
));
3194 void dis_MOVS ( Int sz
, IRTemp t_inc
)
3196 IRType ty
= szToITy(sz
);
3197 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3198 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3200 assign( td
, getIReg(4, R_EDI
) );
3201 assign( ts
, getIReg(4, R_ESI
) );
3203 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
3205 putIReg( 4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3206 putIReg( 4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3210 void dis_LODS ( Int sz
, IRTemp t_inc
)
3212 IRType ty
= szToITy(sz
);
3213 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3215 assign( ts
, getIReg(4, R_ESI
) );
3217 putIReg( sz
, R_EAX
, loadLE(ty
, mkexpr(ts
)) );
3219 putIReg( 4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3223 void dis_STOS ( Int sz
, IRTemp t_inc
)
3225 IRType ty
= szToITy(sz
);
3226 IRTemp ta
= newTemp(ty
); /* EAX */
3227 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3229 assign( ta
, getIReg(sz
, R_EAX
) );
3230 assign( td
, getIReg(4, R_EDI
) );
3232 storeLE( mkexpr(td
), mkexpr(ta
) );
3234 putIReg( 4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3238 void dis_CMPS ( Int sz
, IRTemp t_inc
)
3240 IRType ty
= szToITy(sz
);
3241 IRTemp tdv
= newTemp(ty
); /* (EDI) */
3242 IRTemp tsv
= newTemp(ty
); /* (ESI) */
3243 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3244 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3246 assign( td
, getIReg(4, R_EDI
) );
3247 assign( ts
, getIReg(4, R_ESI
) );
3249 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
3250 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
3252 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
3254 putIReg(4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3255 putIReg(4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3259 void dis_SCAS ( Int sz
, IRTemp t_inc
)
3261 IRType ty
= szToITy(sz
);
3262 IRTemp ta
= newTemp(ty
); /* EAX */
3263 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3264 IRTemp tdv
= newTemp(ty
); /* (EDI) */
3266 assign( ta
, getIReg(sz
, R_EAX
) );
3267 assign( td
, getIReg(4, R_EDI
) );
3269 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
3270 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
3272 putIReg(4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3276 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3277 We assume the insn is the last one in the basic block, and so emit a jump
3278 to the next insn, rather than just falling through. */
3280 void dis_REP_op ( /*MOD*/DisResult
* dres
,
3282 void (*dis_OP
)(Int
, IRTemp
),
3283 Int sz
, Addr32 eip
, Addr32 eip_next
, const HChar
* name
)
3285 IRTemp t_inc
= newTemp(Ity_I32
);
3286 IRTemp tc
= newTemp(Ity_I32
); /* ECX */
3288 assign( tc
, getIReg(4,R_ECX
) );
3290 stmt( IRStmt_Exit( binop(Iop_CmpEQ32
,mkexpr(tc
),mkU32(0)),
3292 IRConst_U32(eip_next
), OFFB_EIP
) );
3294 putIReg(4, R_ECX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
3296 dis_string_op_increment(sz
, t_inc
);
3299 if (cond
== X86CondAlways
) {
3300 jmp_lit(dres
, Ijk_Boring
, eip
);
3301 vassert(dres
->whatNext
== Dis_StopHere
);
3303 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond
),
3305 IRConst_U32(eip
), OFFB_EIP
) );
3306 jmp_lit(dres
, Ijk_Boring
, eip_next
);
3307 vassert(dres
->whatNext
== Dis_StopHere
);
3309 DIP("%s%c\n", name
, nameISize(sz
));
3313 /*------------------------------------------------------------*/
3314 /*--- Arithmetic, etc. ---*/
3315 /*------------------------------------------------------------*/
3317 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3319 UInt
dis_mul_E_G ( UChar sorb
,
3325 UChar rm
= getIByte(delta0
);
3326 IRType ty
= szToITy(size
);
3327 IRTemp te
= newTemp(ty
);
3328 IRTemp tg
= newTemp(ty
);
3329 IRTemp resLo
= newTemp(ty
);
3331 assign( tg
, getIReg(size
, gregOfRM(rm
)) );
3332 if (epartIsReg(rm
)) {
3333 assign( te
, getIReg(size
, eregOfRM(rm
)) );
3335 IRTemp addr
= disAMode( &alen
, sorb
, delta0
, dis_buf
);
3336 assign( te
, loadLE(ty
,mkexpr(addr
)) );
3339 setFlags_MUL ( ty
, te
, tg
, X86G_CC_OP_SMULB
);
3341 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
3343 putIReg(size
, gregOfRM(rm
), mkexpr(resLo
) );
3345 if (epartIsReg(rm
)) {
3346 DIP("imul%c %s, %s\n", nameISize(size
),
3347 nameIReg(size
,eregOfRM(rm
)),
3348 nameIReg(size
,gregOfRM(rm
)));
3351 DIP("imul%c %s, %s\n", nameISize(size
),
3352 dis_buf
, nameIReg(size
,gregOfRM(rm
)));
3358 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3360 UInt
dis_imul_I_E_G ( UChar sorb
,
3367 UChar rm
= getIByte(delta
);
3368 IRType ty
= szToITy(size
);
3369 IRTemp te
= newTemp(ty
);
3370 IRTemp tl
= newTemp(ty
);
3371 IRTemp resLo
= newTemp(ty
);
3373 vassert(size
== 1 || size
== 2 || size
== 4);
3375 if (epartIsReg(rm
)) {
3376 assign(te
, getIReg(size
, eregOfRM(rm
)));
3379 IRTemp addr
= disAMode( &alen
, sorb
, delta
, dis_buf
);
3380 assign(te
, loadLE(ty
, mkexpr(addr
)));
3383 d32
= getSDisp(litsize
,delta
);
3386 if (size
== 1) d32
&= 0xFF;
3387 if (size
== 2) d32
&= 0xFFFF;
3389 assign(tl
, mkU(ty
,d32
));
3391 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
3393 setFlags_MUL ( ty
, te
, tl
, X86G_CC_OP_SMULB
);
3395 putIReg(size
, gregOfRM(rm
), mkexpr(resLo
));
3397 DIP("imul %d, %s, %s\n", d32
,
3398 ( epartIsReg(rm
) ? nameIReg(size
,eregOfRM(rm
)) : dis_buf
),
3399 nameIReg(size
,gregOfRM(rm
)) );
3404 /* Generate an IR sequence to do a count-leading-zeroes operation on
3405 the supplied IRTemp, and return a new IRTemp holding the result.
3406 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3407 argument is zero, return the number of bits in the word (the
3408 natural semantics). */
3409 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
3411 vassert(ty
== Ity_I32
|| ty
== Ity_I16
);
3413 IRTemp src32
= newTemp(Ity_I32
);
3414 assign(src32
, widenUto32( mkexpr(src
) ));
3416 IRTemp src32x
= newTemp(Ity_I32
);
3418 binop(Iop_Shl32
, mkexpr(src32
),
3419 mkU8(32 - 8 * sizeofIRType(ty
))));
3421 // Clz32 has undefined semantics when its input is zero, so
3422 // special-case around that.
3423 IRTemp res32
= newTemp(Ity_I32
);
3426 binop(Iop_CmpEQ32
, mkexpr(src32x
), mkU32(0)),
3427 mkU32(8 * sizeofIRType(ty
)),
3428 unop(Iop_Clz32
, mkexpr(src32x
))
3431 IRTemp res
= newTemp(ty
);
3432 assign(res
, narrowTo(ty
, mkexpr(res32
)));
3437 /*------------------------------------------------------------*/
3439 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3441 /*------------------------------------------------------------*/
3443 /* --- Helper functions for dealing with the register stack. --- */
3445 /* --- Set the emulation-warning pseudo-register. --- */
3447 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
3449 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
3450 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
3453 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3455 static IRExpr
* mkQNaN64 ( void )
3457 /* QNaN is 0 2047 1 0(51times)
3458 == 0b 11111111111b 1 0(51times)
3459 == 0x7FF8 0000 0000 0000
3461 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
3464 /* --------- Get/put the top-of-stack pointer. --------- */
3466 static IRExpr
* get_ftop ( void )
3468 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
3471 static void put_ftop ( IRExpr
* e
)
3473 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
3474 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
3477 /* --------- Get/put the C3210 bits. --------- */
3479 static IRExpr
* get_C3210 ( void )
3481 return IRExpr_Get( OFFB_FC3210
, Ity_I32
);
3484 static void put_C3210 ( IRExpr
* e
)
3486 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
3489 /* --------- Get/put the FPU rounding mode. --------- */
3490 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
3492 return IRExpr_Get( OFFB_FPROUND
, Ity_I32
);
3495 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
3497 stmt( IRStmt_Put( OFFB_FPROUND
, e
) );
3501 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3502 /* Produces a value in 0 .. 3, which is encoded as per the type
3503 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3504 per IRRoundingMode, we merely need to get it and mask it for
3507 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
3509 return binop( Iop_And32
, get_fpround(), mkU32(3) );
3512 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3514 return mkU32(Irrm_NEAREST
);
3518 /* --------- Get/set FP register tag bytes. --------- */
3520 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3522 static void put_ST_TAG ( Int i
, IRExpr
* value
)
3525 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
3526 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
3527 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
3530 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3531 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3533 static IRExpr
* get_ST_TAG ( Int i
)
3535 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
3536 return IRExpr_GetI( descr
, get_ftop(), i
);
3540 /* --------- Get/set FP registers. --------- */
3542 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3543 register's tag to indicate the register is full. The previous
3544 state of the register is not checked. */
3546 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
3549 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
3550 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
3551 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
3552 /* Mark the register as in-use. */
3553 put_ST_TAG(i
, mkU8(1));
3556 /* Given i, and some expression e, emit
3557 ST(i) = is_full(i) ? NaN : e
3558 and set the tag accordingly.
3561 static void put_ST ( Int i
, IRExpr
* value
)
3565 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
3566 /* non-0 means full */
3575 /* Given i, generate an expression yielding 'ST(i)'. */
3577 static IRExpr
* get_ST_UNCHECKED ( Int i
)
3579 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
3580 return IRExpr_GetI( descr
, get_ftop(), i
);
3584 /* Given i, generate an expression yielding
3585 is_full(i) ? ST(i) : NaN
3588 static IRExpr
* get_ST ( Int i
)
3591 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
3592 /* non-0 means full */
3593 get_ST_UNCHECKED(i
),
3599 /* Given i, and some expression e, and a condition cond, generate IR
3600 which has the same effect as put_ST(i,e) when cond is true and has
3601 no effect when cond is false. Given the lack of proper
3602 if-then-else in the IR, this is pretty tricky.
3605 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
3607 // new_tag = if cond then FULL else old_tag
3608 // new_val = if cond then (if old_tag==FULL then NaN else val)
3611 IRTemp old_tag
= newTemp(Ity_I8
);
3612 assign(old_tag
, get_ST_TAG(i
));
3613 IRTemp new_tag
= newTemp(Ity_I8
);
3615 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
3617 IRTemp old_val
= newTemp(Ity_F64
);
3618 assign(old_val
, get_ST_UNCHECKED(i
));
3619 IRTemp new_val
= newTemp(Ity_F64
);
3621 IRExpr_ITE(mkexpr(cond
),
3622 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
3623 /* non-0 means full */
3629 put_ST_UNCHECKED(i
, mkexpr(new_val
));
3630 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3631 // now set it to new_tag instead.
3632 put_ST_TAG(i
, mkexpr(new_tag
));
3635 /* Adjust FTOP downwards by one register. */
3637 static void fp_push ( void )
3639 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
3642 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3645 static void maybe_fp_push ( IRTemp cond
)
3647 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
3650 /* Adjust FTOP upwards by one register, and mark the vacated register
3653 static void fp_pop ( void )
3655 put_ST_TAG(0, mkU8(0));
3656 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
3659 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3662 static void set_C2 ( IRExpr
* e
)
3664 IRExpr
* cleared
= binop(Iop_And32
, get_C3210(), mkU32(~X86G_FC_MASK_C2
));
3665 put_C3210( binop(Iop_Or32
,
3667 binop(Iop_Shl32
, e
, mkU8(X86G_FC_SHIFT_C2
))) );
3670 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3671 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3672 test is simple, but the derivation of it is not so simple.
3674 The exponent field for an IEEE754 double is 11 bits. That means it
3675 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3676 the number is either a NaN or an Infinity and so is not finite.
3677 Furthermore, a finite value of exactly 2^63 is the smallest value
3678 that has exponent value 0x43E. Hence, what we need to do is
3679 extract the exponent, ignoring the sign bit and mantissa, and check
3680 it is < 0x43E, or <= 0x43D.
3682 To make this easily applicable to 32- and 64-bit targets, a
3683 roundabout approach is used. First the number is converted to I64,
3684 then the top 32 bits are taken. Shifting them right by 20 bits
3685 places the sign bit and exponent in the bottom 12 bits. Anding
3686 with 0x7FF gets rid of the sign bit, leaving just the exponent
3687 available for comparison.
3689 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
3691 IRTemp i64
= newTemp(Ity_I64
);
3692 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
3693 IRTemp exponent
= newTemp(Ity_I32
);
3696 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
3698 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
3699 assign(in_range_and_finite
,
3700 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
3701 return in_range_and_finite
;
3704 /* Invent a plausible-looking FPU status word value:
3705 ((ftop & 7) << 11) | (c3210 & 0x4700)
3707 static IRExpr
* get_FPU_sw ( void )
3713 binop(Iop_And32
, get_ftop(), mkU32(7)),
3715 binop(Iop_And32
, get_C3210(), mkU32(0x4700))
3720 /* ------------------------------------------------------- */
3721 /* Given all that stack-mangling junk, we can now go ahead
3722 and describe FP instructions.
3725 /* ST(0) = ST(0) `op` mem64/32(addr)
3726 Need to check ST(0)'s tag on read, but not on write.
3729 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
3732 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
3736 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3738 loadLE(Ity_F64
,mkexpr(addr
))
3743 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3745 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
3751 /* ST(0) = mem64/32(addr) `op` ST(0)
3752 Need to check ST(0)'s tag on read, but not on write.
3755 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
3758 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
3762 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3763 loadLE(Ity_F64
,mkexpr(addr
)),
3769 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3770 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
3777 /* ST(dst) = ST(dst) `op` ST(src).
3778 Check dst and src tags when reading but not on write.
3781 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
3784 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"",
3789 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3797 /* ST(dst) = ST(src) `op` ST(dst).
3798 Check dst and src tags when reading but not on write.
3801 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
,
3802 UInt st_dst
, Bool pop_after
)
3804 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"",
3809 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3817 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
3818 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
3820 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
3821 /* This is a bit of a hack (and isn't really right). It sets
3822 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3823 documentation implies A and S are unchanged.
3825 /* It's also fishy in that it is used both for COMIP and
3826 UCOMIP, and they aren't the same (although similar). */
3827 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
3828 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
3829 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3831 binop(Iop_CmpF64
, get_ST(0), get_ST(i
)),
3834 /* Set NDEP even though it isn't used. This makes redundant-PUT
3835 elimination of previous stores to this field work better. */
3836 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
3843 UInt
dis_FPU ( Bool
* decode_ok
, UChar sorb
, Int delta
)
3850 /* On entry, delta points at the second byte of the insn (the modrm
3852 UChar first_opcode
= getIByte(delta
-1);
3853 UChar modrm
= getIByte(delta
+0);
3855 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3857 if (first_opcode
== 0xD8) {
3860 /* bits 5,4,3 are an opcode extension, and the modRM also
3861 specifies an address. */
3862 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
3865 switch (gregOfRM(modrm
)) {
3867 case 0: /* FADD single-real */
3868 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
3871 case 1: /* FMUL single-real */
3872 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
3875 case 2: /* FCOM single-real */
3876 DIP("fcoms %s\n", dis_buf
);
3877 /* This forces C1 to zero, which isn't right. */
3884 loadLE(Ity_F32
,mkexpr(addr
)))),
3890 case 3: /* FCOMP single-real */
3891 DIP("fcomps %s\n", dis_buf
);
3892 /* This forces C1 to zero, which isn't right. */
3899 loadLE(Ity_F32
,mkexpr(addr
)))),
3906 case 4: /* FSUB single-real */
3907 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
3910 case 5: /* FSUBR single-real */
3911 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
3914 case 6: /* FDIV single-real */
3915 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
3918 case 7: /* FDIVR single-real */
3919 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
3923 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
3924 vex_printf("first_opcode == 0xD8\n");
3931 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3932 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
3935 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3936 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
3939 /* Dunno if this is right */
3940 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3941 r_dst
= (UInt
)modrm
- 0xD0;
3942 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
3943 /* This forces C1 to zero, which isn't right. */
3947 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
3953 /* Dunno if this is right */
3954 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3955 r_dst
= (UInt
)modrm
- 0xD8;
3956 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
3957 /* This forces C1 to zero, which isn't right. */
3961 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
3968 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3969 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
3972 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3973 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
3976 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3977 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
3980 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3981 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
3990 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3992 if (first_opcode
== 0xD9) {
3995 /* bits 5,4,3 are an opcode extension, and the modRM also
3996 specifies an address. */
3997 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4000 switch (gregOfRM(modrm
)) {
4002 case 0: /* FLD single-real */
4003 DIP("flds %s\n", dis_buf
);
4005 put_ST(0, unop(Iop_F32toF64
,
4006 loadLE(Ity_F32
, mkexpr(addr
))));
4009 case 2: /* FST single-real */
4010 DIP("fsts %s\n", dis_buf
);
4011 storeLE(mkexpr(addr
),
4012 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
4015 case 3: /* FSTP single-real */
4016 DIP("fstps %s\n", dis_buf
);
4017 storeLE(mkexpr(addr
),
4018 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
4022 case 4: { /* FLDENV m28 */
4023 /* Uses dirty helper:
4024 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4025 IRTemp ew
= newTemp(Ity_I32
);
4026 IRDirty
* d
= unsafeIRDirty_0_N (
4028 "x86g_dirtyhelper_FLDENV",
4029 &x86g_dirtyhelper_FLDENV
,
4030 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
4033 /* declare we're reading memory */
4035 d
->mAddr
= mkexpr(addr
);
4038 /* declare we're writing guest state */
4040 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4042 d
->fxState
[0].fx
= Ifx_Write
;
4043 d
->fxState
[0].offset
= OFFB_FTOP
;
4044 d
->fxState
[0].size
= sizeof(UInt
);
4046 d
->fxState
[1].fx
= Ifx_Write
;
4047 d
->fxState
[1].offset
= OFFB_FPTAGS
;
4048 d
->fxState
[1].size
= 8 * sizeof(UChar
);
4050 d
->fxState
[2].fx
= Ifx_Write
;
4051 d
->fxState
[2].offset
= OFFB_FPROUND
;
4052 d
->fxState
[2].size
= sizeof(UInt
);
4054 d
->fxState
[3].fx
= Ifx_Write
;
4055 d
->fxState
[3].offset
= OFFB_FC3210
;
4056 d
->fxState
[3].size
= sizeof(UInt
);
4058 stmt( IRStmt_Dirty(d
) );
4060 /* ew contains any emulation warning we may need to
4061 issue. If needed, side-exit to the next insn,
4062 reporting the warning, so that Valgrind's dispatcher
4063 sees the warning. */
4064 put_emwarn( mkexpr(ew
) );
4067 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
4069 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
4074 DIP("fldenv %s\n", dis_buf
);
4078 case 5: {/* FLDCW */
4079 /* The only thing we observe in the control word is the
4080 rounding mode. Therefore, pass the 16-bit value
4081 (x87 native-format control word) to a clean helper,
4082 getting back a 64-bit value, the lower half of which
4083 is the FPROUND value to store, and the upper half of
4084 which is the emulation-warning token which may be
4087 /* ULong x86h_check_fldcw ( UInt ); */
4088 IRTemp t64
= newTemp(Ity_I64
);
4089 IRTemp ew
= newTemp(Ity_I32
);
4090 DIP("fldcw %s\n", dis_buf
);
4091 assign( t64
, mkIRExprCCall(
4092 Ity_I64
, 0/*regparms*/,
4097 loadLE(Ity_I16
, mkexpr(addr
)))
4102 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
4103 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
4104 put_emwarn( mkexpr(ew
) );
4105 /* Finally, if an emulation warning was reported,
4106 side-exit to the next insn, reporting the warning,
4107 so that Valgrind's dispatcher sees the warning. */
4110 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
4112 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
4119 case 6: { /* FNSTENV m28 */
4120 /* Uses dirty helper:
4121 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4122 IRDirty
* d
= unsafeIRDirty_0_N (
4124 "x86g_dirtyhelper_FSTENV",
4125 &x86g_dirtyhelper_FSTENV
,
4126 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
4128 /* declare we're writing memory */
4130 d
->mAddr
= mkexpr(addr
);
4133 /* declare we're reading guest state */
4135 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4137 d
->fxState
[0].fx
= Ifx_Read
;
4138 d
->fxState
[0].offset
= OFFB_FTOP
;
4139 d
->fxState
[0].size
= sizeof(UInt
);
4141 d
->fxState
[1].fx
= Ifx_Read
;
4142 d
->fxState
[1].offset
= OFFB_FPTAGS
;
4143 d
->fxState
[1].size
= 8 * sizeof(UChar
);
4145 d
->fxState
[2].fx
= Ifx_Read
;
4146 d
->fxState
[2].offset
= OFFB_FPROUND
;
4147 d
->fxState
[2].size
= sizeof(UInt
);
4149 d
->fxState
[3].fx
= Ifx_Read
;
4150 d
->fxState
[3].offset
= OFFB_FC3210
;
4151 d
->fxState
[3].size
= sizeof(UInt
);
4153 stmt( IRStmt_Dirty(d
) );
4155 DIP("fnstenv %s\n", dis_buf
);
4159 case 7: /* FNSTCW */
4160 /* Fake up a native x87 FPU control word. The only
4161 thing it depends on is FPROUND[1:0], so call a clean
4162 helper to cook it up. */
4163 /* UInt x86h_create_fpucw ( UInt fpround ) */
4164 DIP("fnstcw %s\n", dis_buf
);
4170 "x86g_create_fpucw", &x86g_create_fpucw
,
4171 mkIRExprVec_1( get_fpround() )
4178 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4179 vex_printf("first_opcode == 0xD9\n");
4187 case 0xC0 ... 0xC7: /* FLD %st(?) */
4188 r_src
= (UInt
)modrm
- 0xC0;
4189 DIP("fld %%st(%u)\n", r_src
);
4190 t1
= newTemp(Ity_F64
);
4191 assign(t1
, get_ST(r_src
));
4193 put_ST(0, mkexpr(t1
));
4196 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4197 r_src
= (UInt
)modrm
- 0xC8;
4198 DIP("fxch %%st(%u)\n", r_src
);
4199 t1
= newTemp(Ity_F64
);
4200 t2
= newTemp(Ity_F64
);
4201 assign(t1
, get_ST(0));
4202 assign(t2
, get_ST(r_src
));
4203 put_ST_UNCHECKED(0, mkexpr(t2
));
4204 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
4207 case 0xE0: /* FCHS */
4209 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
4212 case 0xE1: /* FABS */
4214 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
4217 case 0xE4: /* FTST */
4219 /* This forces C1 to zero, which isn't right. */
4220 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4221 set to 0 if stack underflow occurred; otherwise, set
4222 to 0" which is pretty nonsensical. I guess it's a
4229 IRExpr_Const(IRConst_F64i(0x0ULL
))),
4235 case 0xE5: { /* FXAM */
4236 /* This is an interesting one. It examines %st(0),
4237 regardless of whether the tag says it's empty or not.
4238 Here, just pass both the tag (in our format) and the
4239 value (as a double, actually a ULong) to a helper
4242 = mkIRExprVec_2( unop(Iop_8Uto32
, get_ST_TAG(0)),
4243 unop(Iop_ReinterpF64asI64
,
4244 get_ST_UNCHECKED(0)) );
4245 put_C3210(mkIRExprCCall(
4248 "x86g_calculate_FXAM", &x86g_calculate_FXAM
,
4255 case 0xE8: /* FLD1 */
4258 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4259 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
4262 case 0xE9: /* FLDL2T */
4265 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4266 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
4269 case 0xEA: /* FLDL2E */
4272 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4273 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
4276 case 0xEB: /* FLDPI */
4279 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4280 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
4283 case 0xEC: /* FLDLG2 */
4286 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4287 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
4290 case 0xED: /* FLDLN2 */
4293 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4294 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
4297 case 0xEE: /* FLDZ */
4300 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4301 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
4304 case 0xF0: /* F2XM1 */
4308 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4312 case 0xF1: /* FYL2X */
4316 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4322 case 0xF2: { /* FPTAN */
4324 IRTemp argD
= newTemp(Ity_F64
);
4325 assign(argD
, get_ST(0));
4326 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4327 IRTemp resD
= newTemp(Ity_F64
);
4332 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4336 put_ST_UNCHECKED(0, mkexpr(resD
));
4337 /* Conditionally push 1.0 on the stack, if the arg is
4339 maybe_fp_push(argOK
);
4340 maybe_put_ST(argOK
, 0,
4341 IRExpr_Const(IRConst_F64(1.0)));
4342 set_C2( binop(Iop_Xor32
,
4343 unop(Iop_1Uto32
, mkexpr(argOK
)),
4348 case 0xF3: /* FPATAN */
4352 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4358 case 0xF4: { /* FXTRACT */
4359 IRTemp argF
= newTemp(Ity_F64
);
4360 IRTemp sigF
= newTemp(Ity_F64
);
4361 IRTemp expF
= newTemp(Ity_F64
);
4362 IRTemp argI
= newTemp(Ity_I64
);
4363 IRTemp sigI
= newTemp(Ity_I64
);
4364 IRTemp expI
= newTemp(Ity_I64
);
4366 assign( argF
, get_ST(0) );
4367 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
4370 Ity_I64
, 0/*regparms*/,
4371 "x86amd64g_calculate_FXTRACT",
4372 &x86amd64g_calculate_FXTRACT
,
4373 mkIRExprVec_2( mkexpr(argI
),
4374 mkIRExpr_HWord(0)/*sig*/ ))
4378 Ity_I64
, 0/*regparms*/,
4379 "x86amd64g_calculate_FXTRACT",
4380 &x86amd64g_calculate_FXTRACT
,
4381 mkIRExprVec_2( mkexpr(argI
),
4382 mkIRExpr_HWord(1)/*exp*/ ))
4384 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
4385 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
4387 put_ST_UNCHECKED(0, mkexpr(expF
) );
4390 put_ST(0, mkexpr(sigF
) );
4394 case 0xF5: { /* FPREM1 -- IEEE compliant */
4395 IRTemp a1
= newTemp(Ity_F64
);
4396 IRTemp a2
= newTemp(Ity_F64
);
4398 /* Do FPREM1 twice, once to get the remainder, and once
4399 to get the C3210 flag values. */
4400 assign( a1
, get_ST(0) );
4401 assign( a2
, get_ST(1) );
4404 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4408 triop(Iop_PRem1C3210F64
,
4409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4415 case 0xF7: /* FINCSTP */
4417 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
4420 case 0xF8: { /* FPREM -- not IEEE compliant */
4421 IRTemp a1
= newTemp(Ity_F64
);
4422 IRTemp a2
= newTemp(Ity_F64
);
4424 /* Do FPREM twice, once to get the remainder, and once
4425 to get the C3210 flag values. */
4426 assign( a1
, get_ST(0) );
4427 assign( a2
, get_ST(1) );
4430 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4434 triop(Iop_PRemC3210F64
,
4435 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4441 case 0xF9: /* FYL2XP1 */
4444 triop(Iop_Yl2xp1F64
,
4445 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4451 case 0xFA: /* FSQRT */
4455 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4459 case 0xFB: { /* FSINCOS */
4461 IRTemp argD
= newTemp(Ity_F64
);
4462 assign(argD
, get_ST(0));
4463 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4464 IRTemp resD
= newTemp(Ity_F64
);
4469 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4473 put_ST_UNCHECKED(0, mkexpr(resD
));
4474 /* Conditionally push the cos value on the stack, if
4475 the arg is in range */
4476 maybe_fp_push(argOK
);
4477 maybe_put_ST(argOK
, 0,
4479 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4481 set_C2( binop(Iop_Xor32
,
4482 unop(Iop_1Uto32
, mkexpr(argOK
)),
4487 case 0xFC: /* FRNDINT */
4490 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
4493 case 0xFD: /* FSCALE */
4497 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4502 case 0xFE: /* FSIN */
4503 case 0xFF: { /* FCOS */
4504 Bool isSIN
= modrm
== 0xFE;
4505 DIP("%s\n", isSIN
? "fsin" : "fcos");
4506 IRTemp argD
= newTemp(Ity_F64
);
4507 assign(argD
, get_ST(0));
4508 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4509 IRTemp resD
= newTemp(Ity_F64
);
4513 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
4514 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4518 put_ST_UNCHECKED(0, mkexpr(resD
));
4519 set_C2( binop(Iop_Xor32
,
4520 unop(Iop_1Uto32
, mkexpr(argOK
)),
4531 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4533 if (first_opcode
== 0xDA) {
4537 /* bits 5,4,3 are an opcode extension, and the modRM also
4538 specifies an address. */
4540 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4542 switch (gregOfRM(modrm
)) {
4544 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4545 DIP("fiaddl %s\n", dis_buf
);
4549 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4550 DIP("fimull %s\n", dis_buf
);
4554 case 2: /* FICOM m32int */
4555 DIP("ficoml %s\n", dis_buf
);
4556 /* This forces C1 to zero, which isn't right. */
4563 loadLE(Ity_I32
,mkexpr(addr
)))),
4569 case 3: /* FICOMP m32int */
4570 DIP("ficompl %s\n", dis_buf
);
4571 /* This forces C1 to zero, which isn't right. */
4578 loadLE(Ity_I32
,mkexpr(addr
)))),
4585 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4586 DIP("fisubl %s\n", dis_buf
);
4590 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4591 DIP("fisubrl %s\n", dis_buf
);
4595 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4596 DIP("fidivl %s\n", dis_buf
);
4600 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4601 DIP("fidivrl %s\n", dis_buf
);
4608 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4611 loadLE(Ity_I32
, mkexpr(addr
)))));
4617 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4619 loadLE(Ity_I32
, mkexpr(addr
))),
4624 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4625 vex_printf("first_opcode == 0xDA\n");
4634 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4635 r_src
= (UInt
)modrm
- 0xC0;
4636 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
4639 mk_x86g_calculate_condition(X86CondB
),
4640 get_ST(r_src
), get_ST(0)) );
4643 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4644 r_src
= (UInt
)modrm
- 0xC8;
4645 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
4648 mk_x86g_calculate_condition(X86CondZ
),
4649 get_ST(r_src
), get_ST(0)) );
4652 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4653 r_src
= (UInt
)modrm
- 0xD0;
4654 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
4657 mk_x86g_calculate_condition(X86CondBE
),
4658 get_ST(r_src
), get_ST(0)) );
4661 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4662 r_src
= (UInt
)modrm
- 0xD8;
4663 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
4666 mk_x86g_calculate_condition(X86CondP
),
4667 get_ST(r_src
), get_ST(0)) );
4670 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4671 DIP("fucompp %%st(0),%%st(1)\n");
4672 /* This forces C1 to zero, which isn't right. */
4676 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
4691 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4693 if (first_opcode
== 0xDB) {
4696 /* bits 5,4,3 are an opcode extension, and the modRM also
4697 specifies an address. */
4698 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4701 switch (gregOfRM(modrm
)) {
4703 case 0: /* FILD m32int */
4704 DIP("fildl %s\n", dis_buf
);
4706 put_ST(0, unop(Iop_I32StoF64
,
4707 loadLE(Ity_I32
, mkexpr(addr
))));
4710 case 1: /* FISTTPL m32 (SSE3) */
4711 DIP("fisttpl %s\n", dis_buf
);
4712 storeLE( mkexpr(addr
),
4713 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
4717 case 2: /* FIST m32 */
4718 DIP("fistl %s\n", dis_buf
);
4719 storeLE( mkexpr(addr
),
4720 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
4723 case 3: /* FISTP m32 */
4724 DIP("fistpl %s\n", dis_buf
);
4725 storeLE( mkexpr(addr
),
4726 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
4730 case 5: { /* FLD extended-real */
4731 /* Uses dirty helper:
4732 ULong x86g_loadF80le ( UInt )
4733 addr holds the address. First, do a dirty call to
4734 get hold of the data. */
4735 IRTemp val
= newTemp(Ity_I64
);
4736 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
4738 IRDirty
* d
= unsafeIRDirty_1_N (
4741 "x86g_dirtyhelper_loadF80le",
4742 &x86g_dirtyhelper_loadF80le
,
4745 /* declare that we're reading memory */
4747 d
->mAddr
= mkexpr(addr
);
4750 /* execute the dirty call, dumping the result in val. */
4751 stmt( IRStmt_Dirty(d
) );
4753 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
4755 DIP("fldt %s\n", dis_buf
);
4759 case 7: { /* FSTP extended-real */
4760 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4762 = mkIRExprVec_2( mkexpr(addr
),
4763 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
4765 IRDirty
* d
= unsafeIRDirty_0_N (
4767 "x86g_dirtyhelper_storeF80le",
4768 &x86g_dirtyhelper_storeF80le
,
4771 /* declare we're writing memory */
4773 d
->mAddr
= mkexpr(addr
);
4776 /* execute the dirty call. */
4777 stmt( IRStmt_Dirty(d
) );
4780 DIP("fstpt\n %s", dis_buf
);
4785 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4786 vex_printf("first_opcode == 0xDB\n");
4795 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4796 r_src
= (UInt
)modrm
- 0xC0;
4797 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
4800 mk_x86g_calculate_condition(X86CondNB
),
4801 get_ST(r_src
), get_ST(0)) );
4804 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4805 r_src
= (UInt
)modrm
- 0xC8;
4806 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
4809 mk_x86g_calculate_condition(X86CondNZ
),
4810 get_ST(r_src
), get_ST(0)) );
4813 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4814 r_src
= (UInt
)modrm
- 0xD0;
4815 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
4818 mk_x86g_calculate_condition(X86CondNBE
),
4819 get_ST(r_src
), get_ST(0)) );
4822 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4823 r_src
= (UInt
)modrm
- 0xD8;
4824 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
4827 mk_x86g_calculate_condition(X86CondNP
),
4828 get_ST(r_src
), get_ST(0)) );
4836 /* Uses dirty helper:
4837 void x86g_do_FINIT ( VexGuestX86State* ) */
4838 IRDirty
* d
= unsafeIRDirty_0_N (
4840 "x86g_dirtyhelper_FINIT",
4841 &x86g_dirtyhelper_FINIT
,
4842 mkIRExprVec_1(IRExpr_GSPTR())
4845 /* declare we're writing guest state */
4847 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4849 d
->fxState
[0].fx
= Ifx_Write
;
4850 d
->fxState
[0].offset
= OFFB_FTOP
;
4851 d
->fxState
[0].size
= sizeof(UInt
);
4853 d
->fxState
[1].fx
= Ifx_Write
;
4854 d
->fxState
[1].offset
= OFFB_FPREGS
;
4855 d
->fxState
[1].size
= 8 * sizeof(ULong
);
4857 d
->fxState
[2].fx
= Ifx_Write
;
4858 d
->fxState
[2].offset
= OFFB_FPTAGS
;
4859 d
->fxState
[2].size
= 8 * sizeof(UChar
);
4861 d
->fxState
[3].fx
= Ifx_Write
;
4862 d
->fxState
[3].offset
= OFFB_FPROUND
;
4863 d
->fxState
[3].size
= sizeof(UInt
);
4865 d
->fxState
[4].fx
= Ifx_Write
;
4866 d
->fxState
[4].offset
= OFFB_FC3210
;
4867 d
->fxState
[4].size
= sizeof(UInt
);
4869 stmt( IRStmt_Dirty(d
) );
4875 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4876 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
4879 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4880 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
4889 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4891 if (first_opcode
== 0xDC) {
4894 /* bits 5,4,3 are an opcode extension, and the modRM also
4895 specifies an address. */
4896 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4899 switch (gregOfRM(modrm
)) {
4901 case 0: /* FADD double-real */
4902 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
4905 case 1: /* FMUL double-real */
4906 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
4909 case 2: /* FCOM double-real */
4910 DIP("fcoml %s\n", dis_buf
);
4911 /* This forces C1 to zero, which isn't right. */
4917 loadLE(Ity_F64
,mkexpr(addr
))),
4923 case 3: /* FCOMP double-real */
4924 DIP("fcompl %s\n", dis_buf
);
4925 /* This forces C1 to zero, which isn't right. */
4931 loadLE(Ity_F64
,mkexpr(addr
))),
4938 case 4: /* FSUB double-real */
4939 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
4942 case 5: /* FSUBR double-real */
4943 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
4946 case 6: /* FDIV double-real */
4947 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
4950 case 7: /* FDIVR double-real */
4951 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
4955 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4956 vex_printf("first_opcode == 0xDC\n");
4965 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4966 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
4969 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4970 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
4973 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4974 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
4977 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4978 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
4981 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4982 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
4985 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4986 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
4996 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
4998 if (first_opcode
== 0xDD) {
5002 /* bits 5,4,3 are an opcode extension, and the modRM also
5003 specifies an address. */
5004 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5007 switch (gregOfRM(modrm
)) {
5009 case 0: /* FLD double-real */
5010 DIP("fldl %s\n", dis_buf
);
5012 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
5015 case 1: /* FISTTPQ m64 (SSE3) */
5016 DIP("fistppll %s\n", dis_buf
);
5017 storeLE( mkexpr(addr
),
5018 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
5022 case 2: /* FST double-real */
5023 DIP("fstl %s\n", dis_buf
);
5024 storeLE(mkexpr(addr
), get_ST(0));
5027 case 3: /* FSTP double-real */
5028 DIP("fstpl %s\n", dis_buf
);
5029 storeLE(mkexpr(addr
), get_ST(0));
5033 case 4: { /* FRSTOR m108 */
5034 /* Uses dirty helper:
5035 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5036 IRTemp ew
= newTemp(Ity_I32
);
5037 IRDirty
* d
= unsafeIRDirty_0_N (
5039 "x86g_dirtyhelper_FRSTOR",
5040 &x86g_dirtyhelper_FRSTOR
,
5041 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5044 /* declare we're reading memory */
5046 d
->mAddr
= mkexpr(addr
);
5049 /* declare we're writing guest state */
5051 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5053 d
->fxState
[0].fx
= Ifx_Write
;
5054 d
->fxState
[0].offset
= OFFB_FTOP
;
5055 d
->fxState
[0].size
= sizeof(UInt
);
5057 d
->fxState
[1].fx
= Ifx_Write
;
5058 d
->fxState
[1].offset
= OFFB_FPREGS
;
5059 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5061 d
->fxState
[2].fx
= Ifx_Write
;
5062 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5063 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5065 d
->fxState
[3].fx
= Ifx_Write
;
5066 d
->fxState
[3].offset
= OFFB_FPROUND
;
5067 d
->fxState
[3].size
= sizeof(UInt
);
5069 d
->fxState
[4].fx
= Ifx_Write
;
5070 d
->fxState
[4].offset
= OFFB_FC3210
;
5071 d
->fxState
[4].size
= sizeof(UInt
);
5073 stmt( IRStmt_Dirty(d
) );
5075 /* ew contains any emulation warning we may need to
5076 issue. If needed, side-exit to the next insn,
5077 reporting the warning, so that Valgrind's dispatcher
5078 sees the warning. */
5079 put_emwarn( mkexpr(ew
) );
5082 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5084 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
5089 DIP("frstor %s\n", dis_buf
);
5093 case 6: { /* FNSAVE m108 */
5094 /* Uses dirty helper:
5095 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5096 IRDirty
* d
= unsafeIRDirty_0_N (
5098 "x86g_dirtyhelper_FSAVE",
5099 &x86g_dirtyhelper_FSAVE
,
5100 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5102 /* declare we're writing memory */
5104 d
->mAddr
= mkexpr(addr
);
5107 /* declare we're reading guest state */
5109 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5111 d
->fxState
[0].fx
= Ifx_Read
;
5112 d
->fxState
[0].offset
= OFFB_FTOP
;
5113 d
->fxState
[0].size
= sizeof(UInt
);
5115 d
->fxState
[1].fx
= Ifx_Read
;
5116 d
->fxState
[1].offset
= OFFB_FPREGS
;
5117 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5119 d
->fxState
[2].fx
= Ifx_Read
;
5120 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5121 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5123 d
->fxState
[3].fx
= Ifx_Read
;
5124 d
->fxState
[3].offset
= OFFB_FPROUND
;
5125 d
->fxState
[3].size
= sizeof(UInt
);
5127 d
->fxState
[4].fx
= Ifx_Read
;
5128 d
->fxState
[4].offset
= OFFB_FC3210
;
5129 d
->fxState
[4].size
= sizeof(UInt
);
5131 stmt( IRStmt_Dirty(d
) );
5133 DIP("fnsave %s\n", dis_buf
);
5137 case 7: { /* FNSTSW m16 */
5138 IRExpr
* sw
= get_FPU_sw();
5139 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
5140 storeLE( mkexpr(addr
), sw
);
5141 DIP("fnstsw %s\n", dis_buf
);
5146 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5147 vex_printf("first_opcode == 0xDD\n");
5154 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5155 r_dst
= (UInt
)modrm
- 0xC0;
5156 DIP("ffree %%st(%u)\n", r_dst
);
5157 put_ST_TAG ( r_dst
, mkU8(0) );
5160 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5161 r_dst
= (UInt
)modrm
- 0xD0;
5162 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
5163 /* P4 manual says: "If the destination operand is a
5164 non-empty register, the invalid-operation exception
5165 is not generated. Hence put_ST_UNCHECKED. */
5166 put_ST_UNCHECKED(r_dst
, get_ST(0));
5169 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5170 r_dst
= (UInt
)modrm
- 0xD8;
5171 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
5172 /* P4 manual says: "If the destination operand is a
5173 non-empty register, the invalid-operation exception
5174 is not generated. Hence put_ST_UNCHECKED. */
5175 put_ST_UNCHECKED(r_dst
, get_ST(0));
5179 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5180 r_dst
= (UInt
)modrm
- 0xE0;
5181 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
5182 /* This forces C1 to zero, which isn't right. */
5186 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5192 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5193 r_dst
= (UInt
)modrm
- 0xE8;
5194 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
5195 /* This forces C1 to zero, which isn't right. */
5199 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5212 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5214 if (first_opcode
== 0xDE) {
5218 /* bits 5,4,3 are an opcode extension, and the modRM also
5219 specifies an address. */
5221 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5224 switch (gregOfRM(modrm
)) {
5226 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5227 DIP("fiaddw %s\n", dis_buf
);
5231 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5232 DIP("fimulw %s\n", dis_buf
);
5236 case 2: /* FICOM m16int */
5237 DIP("ficomw %s\n", dis_buf
);
5238 /* This forces C1 to zero, which isn't right. */
5246 loadLE(Ity_I16
,mkexpr(addr
))))),
5252 case 3: /* FICOMP m16int */
5253 DIP("ficompw %s\n", dis_buf
);
5254 /* This forces C1 to zero, which isn't right. */
5262 loadLE(Ity_I16
,mkexpr(addr
))))),
5269 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5270 DIP("fisubw %s\n", dis_buf
);
5274 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5275 DIP("fisubrw %s\n", dis_buf
);
5279 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5280 DIP("fisubw %s\n", dis_buf
);
5284 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5285 DIP("fidivrw %s\n", dis_buf
);
5292 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5296 loadLE(Ity_I16
, mkexpr(addr
))))));
5302 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5305 loadLE(Ity_I16
, mkexpr(addr
)))),
5310 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5311 vex_printf("first_opcode == 0xDE\n");
5320 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5321 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
5324 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5325 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
5328 case 0xD9: /* FCOMPP %st(0),%st(1) */
5329 DIP("fuompp %%st(0),%%st(1)\n");
5330 /* This forces C1 to zero, which isn't right. */
5334 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
5342 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5343 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
5346 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5347 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
5350 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5351 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
5354 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5355 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
5365 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5367 if (first_opcode
== 0xDF) {
5371 /* bits 5,4,3 are an opcode extension, and the modRM also
5372 specifies an address. */
5373 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5376 switch (gregOfRM(modrm
)) {
5378 case 0: /* FILD m16int */
5379 DIP("fildw %s\n", dis_buf
);
5381 put_ST(0, unop(Iop_I32StoF64
,
5383 loadLE(Ity_I16
, mkexpr(addr
)))));
5386 case 1: /* FISTTPS m16 (SSE3) */
5387 DIP("fisttps %s\n", dis_buf
);
5388 storeLE( mkexpr(addr
),
5389 binop(Iop_F64toI16S
, mkU32(Irrm_ZERO
), get_ST(0)) );
5393 case 2: /* FIST m16 */
5394 DIP("fistp %s\n", dis_buf
);
5395 storeLE( mkexpr(addr
),
5396 binop(Iop_F64toI16S
, get_roundingmode(), get_ST(0)) );
5399 case 3: /* FISTP m16 */
5400 DIP("fistps %s\n", dis_buf
);
5401 storeLE( mkexpr(addr
),
5402 binop(Iop_F64toI16S
, get_roundingmode(), get_ST(0)) );
5406 case 5: /* FILD m64 */
5407 DIP("fildll %s\n", dis_buf
);
5409 put_ST(0, binop(Iop_I64StoF64
,
5411 loadLE(Ity_I64
, mkexpr(addr
))));
5414 case 7: /* FISTP m64 */
5415 DIP("fistpll %s\n", dis_buf
);
5416 storeLE( mkexpr(addr
),
5417 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
5422 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5423 vex_printf("first_opcode == 0xDF\n");
5432 case 0xC0: /* FFREEP %st(0) */
5433 DIP("ffreep %%st(%d)\n", 0);
5434 put_ST_TAG ( 0, mkU8(0) );
5438 case 0xE0: /* FNSTSW %ax */
5439 DIP("fnstsw %%ax\n");
5440 /* Get the FPU status word value and dump it in %AX. */
5442 /* The obvious thing to do is simply dump the 16-bit
5443 status word value in %AX. However, due to a
5444 limitation in Memcheck's origin tracking
5445 machinery, this causes Memcheck not to track the
5446 origin of any undefinedness into %AH (only into
5447 %AL/%AX/%EAX), which means origins are lost in
5448 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5449 putIReg(2, R_EAX
, get_FPU_sw());
5451 /* So a somewhat lame kludge is to make it very
5452 clear to Memcheck that the value is written to
5453 both %AH and %AL. This generates marginally
5454 worse code, but I don't think it matters much. */
5455 IRTemp t16
= newTemp(Ity_I16
);
5456 assign(t16
, get_FPU_sw());
5457 putIReg( 1, R_AL
, unop(Iop_16to8
, mkexpr(t16
)) );
5458 putIReg( 1, R_AH
, unop(Iop_16HIto8
, mkexpr(t16
)) );
5462 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5463 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
5466 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5467 /* not really right since COMIP != UCOMIP */
5468 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
5479 vpanic("dis_FPU(x86): invalid primary opcode");
5490 /*------------------------------------------------------------*/
5492 /*--- MMX INSTRUCTIONS ---*/
5494 /*------------------------------------------------------------*/
5496 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5497 IA32 arch manual, volume 3):
5499 Read from, or write to MMX register (viz, any insn except EMMS):
5500 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5501 * FP stack pointer set to zero
5504 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5505 * FP stack pointer set to zero
5508 static void do_MMX_preamble ( void )
5511 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5512 IRExpr
* zero
= mkU32(0);
5513 IRExpr
* tag1
= mkU8(1);
5515 for (i
= 0; i
< 8; i
++)
5516 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
5519 static void do_EMMS_preamble ( void )
5522 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5523 IRExpr
* zero
= mkU32(0);
5524 IRExpr
* tag0
= mkU8(0);
5526 for (i
= 0; i
< 8; i
++)
5527 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
5531 static IRExpr
* getMMXReg ( UInt archreg
)
5533 vassert(archreg
< 8);
5534 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
5538 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
5540 vassert(archreg
< 8);
5541 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
5542 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
5546 /* Helper for non-shift MMX insns. Note this is incomplete in the
5547 sense that it does not first call do_MMX_preamble() -- that is the
5548 responsibility of its caller. */
5551 UInt
dis_MMXop_regmem_to_reg ( UChar sorb
,
5555 Bool show_granularity
)
5558 UChar modrm
= getIByte(delta
);
5559 Bool isReg
= epartIsReg(modrm
);
5560 IRExpr
* argL
= NULL
;
5561 IRExpr
* argR
= NULL
;
5562 IRExpr
* argG
= NULL
;
5563 IRExpr
* argE
= NULL
;
5564 IRTemp res
= newTemp(Ity_I64
);
5567 IROp op
= Iop_INVALID
;
5570 const HChar
* hName
= NULL
;
5572 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5575 /* Original MMX ones */
5576 case 0xFC: op
= Iop_Add8x8
; break;
5577 case 0xFD: op
= Iop_Add16x4
; break;
5578 case 0xFE: op
= Iop_Add32x2
; break;
5580 case 0xEC: op
= Iop_QAdd8Sx8
; break;
5581 case 0xED: op
= Iop_QAdd16Sx4
; break;
5583 case 0xDC: op
= Iop_QAdd8Ux8
; break;
5584 case 0xDD: op
= Iop_QAdd16Ux4
; break;
5586 case 0xF8: op
= Iop_Sub8x8
; break;
5587 case 0xF9: op
= Iop_Sub16x4
; break;
5588 case 0xFA: op
= Iop_Sub32x2
; break;
5590 case 0xE8: op
= Iop_QSub8Sx8
; break;
5591 case 0xE9: op
= Iop_QSub16Sx4
; break;
5593 case 0xD8: op
= Iop_QSub8Ux8
; break;
5594 case 0xD9: op
= Iop_QSub16Ux4
; break;
5596 case 0xE5: op
= Iop_MulHi16Sx4
; break;
5597 case 0xD5: op
= Iop_Mul16x4
; break;
5598 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd
); break;
5600 case 0x74: op
= Iop_CmpEQ8x8
; break;
5601 case 0x75: op
= Iop_CmpEQ16x4
; break;
5602 case 0x76: op
= Iop_CmpEQ32x2
; break;
5604 case 0x64: op
= Iop_CmpGT8Sx8
; break;
5605 case 0x65: op
= Iop_CmpGT16Sx4
; break;
5606 case 0x66: op
= Iop_CmpGT32Sx2
; break;
5608 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
5609 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
5610 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
5612 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
5613 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
5614 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
5616 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
5617 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
5618 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
5620 case 0xDB: op
= Iop_And64
; break;
5621 case 0xDF: op
= Iop_And64
; invG
= True
; break;
5622 case 0xEB: op
= Iop_Or64
; break;
5623 case 0xEF: /* Possibly do better here if argL and argR are the
5625 op
= Iop_Xor64
; break;
5627 /* Introduced in SSE1 */
5628 case 0xE0: op
= Iop_Avg8Ux8
; break;
5629 case 0xE3: op
= Iop_Avg16Ux4
; break;
5630 case 0xEE: op
= Iop_Max16Sx4
; break;
5631 case 0xDE: op
= Iop_Max8Ux8
; break;
5632 case 0xEA: op
= Iop_Min16Sx4
; break;
5633 case 0xDA: op
= Iop_Min8Ux8
; break;
5634 case 0xE4: op
= Iop_MulHi16Ux4
; break;
5635 case 0xF6: XXX(x86g_calculate_mmx_psadbw
); break;
5637 /* Introduced in SSE2 */
5638 case 0xD4: op
= Iop_Add64
; break;
5639 case 0xFB: op
= Iop_Sub64
; break;
5642 vex_printf("\n0x%x\n", opc
);
5643 vpanic("dis_MMXop_regmem_to_reg");
5648 argG
= getMMXReg(gregOfRM(modrm
));
5650 argG
= unop(Iop_Not64
, argG
);
5654 argE
= getMMXReg(eregOfRM(modrm
));
5657 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5659 argE
= loadLE(Ity_I64
, mkexpr(addr
));
5670 if (op
!= Iop_INVALID
) {
5671 vassert(hName
== NULL
);
5672 vassert(hAddr
== NULL
);
5673 assign(res
, binop(op
, argL
, argR
));
5675 vassert(hName
!= NULL
);
5676 vassert(hAddr
!= NULL
);
5680 0/*regparms*/, hName
, hAddr
,
5681 mkIRExprVec_2( argL
, argR
)
5686 putMMXReg( gregOfRM(modrm
), mkexpr(res
) );
5688 DIP("%s%s %s, %s\n",
5689 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
5690 ( isReg
? nameMMXReg(eregOfRM(modrm
)) : dis_buf
),
5691 nameMMXReg(gregOfRM(modrm
)) );
5697 /* Vector by scalar shift of G by the amount specified at the bottom
5698 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5700 static UInt
dis_MMX_shiftG_byE ( UChar sorb
, Int delta
,
5701 const HChar
* opname
, IROp op
)
5707 UChar rm
= getIByte(delta
);
5708 IRTemp g0
= newTemp(Ity_I64
);
5709 IRTemp g1
= newTemp(Ity_I64
);
5710 IRTemp amt
= newTemp(Ity_I32
);
5711 IRTemp amt8
= newTemp(Ity_I8
);
5713 if (epartIsReg(rm
)) {
5714 assign( amt
, unop(Iop_64to32
, getMMXReg(eregOfRM(rm
))) );
5715 DIP("%s %s,%s\n", opname
,
5716 nameMMXReg(eregOfRM(rm
)),
5717 nameMMXReg(gregOfRM(rm
)) );
5720 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
5721 assign( amt
, loadLE(Ity_I32
, mkexpr(addr
)) );
5722 DIP("%s %s,%s\n", opname
,
5724 nameMMXReg(gregOfRM(rm
)) );
5727 assign( g0
, getMMXReg(gregOfRM(rm
)) );
5728 assign( amt8
, unop(Iop_32to8
, mkexpr(amt
)) );
5730 shl
= shr
= sar
= False
;
5733 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
5734 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
5735 case Iop_Shl64
: shl
= True
; size
= 64; break;
5736 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
5737 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
5738 case Iop_Shr64
: shr
= True
; size
= 64; break;
5739 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
5740 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
5741 default: vassert(0);
5748 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
5749 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
5758 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
5759 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
5760 binop(op
, mkexpr(g0
), mkU8(size
-1))
5768 putMMXReg( gregOfRM(rm
), mkexpr(g1
) );
5773 /* Vector by scalar shift of E by an immediate byte. This is a
5774 straight copy of dis_SSE_shiftE_imm. */
5777 UInt
dis_MMX_shiftE_imm ( Int delta
, const HChar
* opname
, IROp op
)
5780 UChar rm
= getIByte(delta
);
5781 IRTemp e0
= newTemp(Ity_I64
);
5782 IRTemp e1
= newTemp(Ity_I64
);
5784 vassert(epartIsReg(rm
));
5785 vassert(gregOfRM(rm
) == 2
5786 || gregOfRM(rm
) == 4 || gregOfRM(rm
) == 6);
5787 amt
= getIByte(delta
+1);
5789 DIP("%s $%d,%s\n", opname
,
5791 nameMMXReg(eregOfRM(rm
)) );
5793 assign( e0
, getMMXReg(eregOfRM(rm
)) );
5795 shl
= shr
= sar
= False
;
5798 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
5799 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
5800 case Iop_Shl64
: shl
= True
; size
= 64; break;
5801 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
5802 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
5803 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
5804 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
5805 case Iop_Shr64
: shr
= True
; size
= 64; break;
5806 default: vassert(0);
5810 assign( e1
, amt
>= size
5812 : binop(op
, mkexpr(e0
), mkU8(amt
))
5816 assign( e1
, amt
>= size
5817 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
5818 : binop(op
, mkexpr(e0
), mkU8(amt
))
5825 putMMXReg( eregOfRM(rm
), mkexpr(e1
) );
5830 /* Completely handle all MMX instructions except emms. */
5833 UInt
dis_MMX ( Bool
* decode_ok
, UChar sorb
, Int sz
, Int delta
)
5838 UChar opc
= getIByte(delta
);
5841 /* dis_MMX handles all insns except emms. */
5847 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5849 goto mmx_decode_failure
;
5850 modrm
= getIByte(delta
);
5851 if (epartIsReg(modrm
)) {
5855 binop( Iop_32HLto64
,
5857 getIReg(4, eregOfRM(modrm
)) ) );
5858 DIP("movd %s, %s\n",
5859 nameIReg(4,eregOfRM(modrm
)), nameMMXReg(gregOfRM(modrm
)));
5861 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5865 binop( Iop_32HLto64
,
5867 loadLE(Ity_I32
, mkexpr(addr
)) ) );
5868 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregOfRM(modrm
)));
5872 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5874 goto mmx_decode_failure
;
5875 modrm
= getIByte(delta
);
5876 if (epartIsReg(modrm
)) {
5878 putIReg( 4, eregOfRM(modrm
),
5879 unop(Iop_64to32
, getMMXReg(gregOfRM(modrm
)) ) );
5880 DIP("movd %s, %s\n",
5881 nameMMXReg(gregOfRM(modrm
)), nameIReg(4,eregOfRM(modrm
)));
5883 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5885 storeLE( mkexpr(addr
),
5886 unop(Iop_64to32
, getMMXReg(gregOfRM(modrm
)) ) );
5887 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm
)), dis_buf
);
5892 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5894 goto mmx_decode_failure
;
5895 modrm
= getIByte(delta
);
5896 if (epartIsReg(modrm
)) {
5898 putMMXReg( gregOfRM(modrm
), getMMXReg(eregOfRM(modrm
)) );
5899 DIP("movq %s, %s\n",
5900 nameMMXReg(eregOfRM(modrm
)), nameMMXReg(gregOfRM(modrm
)));
5902 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5904 putMMXReg( gregOfRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
5905 DIP("movq %s, %s\n",
5906 dis_buf
, nameMMXReg(gregOfRM(modrm
)));
5911 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5913 goto mmx_decode_failure
;
5914 modrm
= getIByte(delta
);
5915 if (epartIsReg(modrm
)) {
5917 putMMXReg( eregOfRM(modrm
), getMMXReg(gregOfRM(modrm
)) );
5918 DIP("movq %s, %s\n",
5919 nameMMXReg(gregOfRM(modrm
)), nameMMXReg(eregOfRM(modrm
)));
5921 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5923 storeLE( mkexpr(addr
), getMMXReg(gregOfRM(modrm
)) );
5924 DIP("mov(nt)q %s, %s\n",
5925 nameMMXReg(gregOfRM(modrm
)), dis_buf
);
5931 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5933 goto mmx_decode_failure
;
5934 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "padd", True
);
5938 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5940 goto mmx_decode_failure
;
5941 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "padds", True
);
5945 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5947 goto mmx_decode_failure
;
5948 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "paddus", True
);
5953 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5955 goto mmx_decode_failure
;
5956 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psub", True
);
5960 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5962 goto mmx_decode_failure
;
5963 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psubs", True
);
5967 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5969 goto mmx_decode_failure
;
5970 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psubus", True
);
5973 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5975 goto mmx_decode_failure
;
5976 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmulhw", False
);
5979 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5981 goto mmx_decode_failure
;
5982 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmullw", False
);
5985 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5987 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmaddwd", False
);
5992 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5994 goto mmx_decode_failure
;
5995 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pcmpeq", True
);
6000 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6002 goto mmx_decode_failure
;
6003 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pcmpgt", True
);
6006 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6008 goto mmx_decode_failure
;
6009 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packssdw", False
);
6012 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6014 goto mmx_decode_failure
;
6015 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packsswb", False
);
6018 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6020 goto mmx_decode_failure
;
6021 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packuswb", False
);
6026 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6028 goto mmx_decode_failure
;
6029 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "punpckh", True
);
6034 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6036 goto mmx_decode_failure
;
6037 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "punpckl", True
);
6040 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6042 goto mmx_decode_failure
;
6043 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pand", False
);
6046 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6048 goto mmx_decode_failure
;
6049 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pandn", False
);
6052 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6054 goto mmx_decode_failure
;
6055 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "por", False
);
6058 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6060 goto mmx_decode_failure
;
6061 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pxor", False
);
6064 # define SHIFT_BY_REG(_name,_op) \
6065 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6068 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6069 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
6070 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
6071 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
6073 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6074 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
6075 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
6076 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
6078 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6079 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
6080 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
6082 # undef SHIFT_BY_REG
6087 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6088 UChar byte2
, subopc
;
6090 goto mmx_decode_failure
;
6091 byte2
= getIByte(delta
); /* amode / sub-opcode */
6092 subopc
= toUChar( (byte2
>> 3) & 7 );
6094 # define SHIFT_BY_IMM(_name,_op) \
6095 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6098 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
6099 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
6100 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
6101 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
6102 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
6103 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
6105 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
6106 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
6107 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
6108 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
6110 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
6111 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
6112 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
6113 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
6114 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
6115 SHIFT_BY_IMM("psllq", Iop_Shl64
);
6117 else goto mmx_decode_failure
;
6119 # undef SHIFT_BY_IMM
6124 IRTemp addr
= newTemp(Ity_I32
);
6125 IRTemp regD
= newTemp(Ity_I64
);
6126 IRTemp regM
= newTemp(Ity_I64
);
6127 IRTemp mask
= newTemp(Ity_I64
);
6128 IRTemp olddata
= newTemp(Ity_I64
);
6129 IRTemp newdata
= newTemp(Ity_I64
);
6131 modrm
= getIByte(delta
);
6132 if (sz
!= 4 || (!epartIsReg(modrm
)))
6133 goto mmx_decode_failure
;
6136 assign( addr
, handleSegOverride( sorb
, getIReg(4, R_EDI
) ));
6137 assign( regM
, getMMXReg( eregOfRM(modrm
) ));
6138 assign( regD
, getMMXReg( gregOfRM(modrm
) ));
6139 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
6140 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
6148 unop(Iop_Not64
, mkexpr(mask
)))) );
6149 storeLE( mkexpr(addr
), mkexpr(newdata
) );
6150 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm
) ),
6151 nameMMXReg( gregOfRM(modrm
) ) );
6155 /* --- MMX decode failure --- */
6159 return delta
; /* ignored */
6168 /*------------------------------------------------------------*/
6169 /*--- More misc arithmetic and other obscure insns. ---*/
6170 /*------------------------------------------------------------*/
6172 /* Double length left and right shifts. Apparently only required in
6173 v-size (no b- variant). */
6175 UInt
dis_SHLRD_Gv_Ev ( UChar sorb
,
6176 Int delta
, UChar modrm
,
6179 Bool amt_is_literal
,
6180 const HChar
* shift_amt_txt
,
6183 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6184 for printing it. And eip on entry points at the modrm byte. */
6188 IRType ty
= szToITy(sz
);
6189 IRTemp gsrc
= newTemp(ty
);
6190 IRTemp esrc
= newTemp(ty
);
6191 IRTemp addr
= IRTemp_INVALID
;
6192 IRTemp tmpSH
= newTemp(Ity_I8
);
6193 IRTemp tmpL
= IRTemp_INVALID
;
6194 IRTemp tmpRes
= IRTemp_INVALID
;
6195 IRTemp tmpSubSh
= IRTemp_INVALID
;
6199 IRExpr
* mask
= NULL
;
6201 vassert(sz
== 2 || sz
== 4);
6203 /* The E-part is the destination; this is shifted. The G-part
6204 supplies bits to be shifted into the E-part, but is not
6207 If shifting left, form a double-length word with E at the top
6208 and G at the bottom, and shift this left. The result is then in
6211 If shifting right, form a double-length word with G at the top
6212 and E at the bottom, and shift this right. The result is then
6215 /* Fetch the operands. */
6217 assign( gsrc
, getIReg(sz
, gregOfRM(modrm
)) );
6219 if (epartIsReg(modrm
)) {
6221 assign( esrc
, getIReg(sz
, eregOfRM(modrm
)) );
6222 DIP("sh%cd%c %s, %s, %s\n",
6223 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
6225 nameIReg(sz
, gregOfRM(modrm
)), nameIReg(sz
, eregOfRM(modrm
)));
6227 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
6229 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
6230 DIP("sh%cd%c %s, %s, %s\n",
6231 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
6233 nameIReg(sz
, gregOfRM(modrm
)), dis_buf
);
6236 /* Round up the relevant primops. */
6239 tmpL
= newTemp(Ity_I64
);
6240 tmpRes
= newTemp(Ity_I32
);
6241 tmpSubSh
= newTemp(Ity_I32
);
6242 mkpair
= Iop_32HLto64
;
6243 getres
= left_shift
? Iop_64HIto32
: Iop_64to32
;
6244 shift
= left_shift
? Iop_Shl64
: Iop_Shr64
;
6248 tmpL
= newTemp(Ity_I32
);
6249 tmpRes
= newTemp(Ity_I16
);
6250 tmpSubSh
= newTemp(Ity_I16
);
6251 mkpair
= Iop_16HLto32
;
6252 getres
= left_shift
? Iop_32HIto16
: Iop_32to16
;
6253 shift
= left_shift
? Iop_Shl32
: Iop_Shr32
;
6257 /* Do the shift, calculate the subshift value, and set
6260 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mask
) );
6263 assign( tmpL
, binop(mkpair
, mkexpr(esrc
), mkexpr(gsrc
)) );
6265 assign( tmpL
, binop(mkpair
, mkexpr(gsrc
), mkexpr(esrc
)) );
6267 assign( tmpRes
, unop(getres
, binop(shift
, mkexpr(tmpL
), mkexpr(tmpSH
)) ) );
6273 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
6276 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl32
: Iop_Sar32
,
6277 tmpRes
, tmpSubSh
, ty
, tmpSH
);
6279 /* Put result back. */
6281 if (epartIsReg(modrm
)) {
6282 putIReg(sz
, eregOfRM(modrm
), mkexpr(tmpRes
));
6284 storeLE( mkexpr(addr
), mkexpr(tmpRes
) );
6287 if (amt_is_literal
) delta
++;
6292 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6295 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
6297 static const HChar
* nameBtOp ( BtOp op
)
6300 case BtOpNone
: return "";
6301 case BtOpSet
: return "s";
6302 case BtOpReset
: return "r";
6303 case BtOpComp
: return "c";
6304 default: vpanic("nameBtOp(x86)");
6310 UInt
dis_bt_G_E ( const VexAbiInfo
* vbi
,
6311 UChar sorb
, Bool locked
, Int sz
, Int delta
, BtOp op
)
6316 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
6317 t_addr1
, t_esp
, t_mask
, t_new
;
6319 vassert(sz
== 2 || sz
== 4);
6321 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
6322 = t_addr0
= t_addr1
= t_esp
6323 = t_mask
= t_new
= IRTemp_INVALID
;
6325 t_fetched
= newTemp(Ity_I8
);
6326 t_new
= newTemp(Ity_I8
);
6327 t_bitno0
= newTemp(Ity_I32
);
6328 t_bitno1
= newTemp(Ity_I32
);
6329 t_bitno2
= newTemp(Ity_I8
);
6330 t_addr1
= newTemp(Ity_I32
);
6331 modrm
= getIByte(delta
);
6333 assign( t_bitno0
, widenSto32(getIReg(sz
, gregOfRM(modrm
))) );
6335 if (epartIsReg(modrm
)) {
6337 /* Get it onto the client's stack. */
6338 t_esp
= newTemp(Ity_I32
);
6339 t_addr0
= newTemp(Ity_I32
);
6341 /* For the choice of the value 128, see comment in dis_bt_G_E in
6342 guest_amd64_toIR.c. We point out here only that 128 is
6343 fast-cased in Memcheck and is > 0, so seems like a good
6345 vassert(vbi
->guest_stack_redzone_size
== 0);
6346 assign( t_esp
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(128)) );
6347 putIReg(4, R_ESP
, mkexpr(t_esp
));
6349 storeLE( mkexpr(t_esp
), getIReg(sz
, eregOfRM(modrm
)) );
6351 /* Make t_addr0 point at it. */
6352 assign( t_addr0
, mkexpr(t_esp
) );
6354 /* Mask out upper bits of the shift amount, since we're doing a
6356 assign( t_bitno1
, binop(Iop_And32
,
6358 mkU32(sz
== 4 ? 31 : 15)) );
6361 t_addr0
= disAMode ( &len
, sorb
, delta
, dis_buf
);
6363 assign( t_bitno1
, mkexpr(t_bitno0
) );
6366 /* At this point: t_addr0 is the address being operated on. If it
6367 was a reg, we will have pushed it onto the client's stack.
6368 t_bitno1 is the bit number, suitably masked in the case of a
6371 /* Now the main sequence. */
6375 binop(Iop_Sar32
, mkexpr(t_bitno1
), mkU8(3))) );
6377 /* t_addr1 now holds effective address */
6381 binop(Iop_And32
, mkexpr(t_bitno1
), mkU32(7))) );
6383 /* t_bitno2 contains offset of bit within byte */
6385 if (op
!= BtOpNone
) {
6386 t_mask
= newTemp(Ity_I8
);
6387 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
6390 /* t_mask is now a suitable byte mask */
6392 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
6394 if (op
!= BtOpNone
) {
6398 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
6402 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
6406 binop(Iop_And8
, mkexpr(t_fetched
),
6407 unop(Iop_Not8
, mkexpr(t_mask
))) );
6410 vpanic("dis_bt_G_E(x86)");
6412 if (locked
&& !epartIsReg(modrm
)) {
6413 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
6414 mkexpr(t_new
)/*new*/,
6415 guest_EIP_curr_instr
);
6417 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
6421 /* Side effect done; now get selected bit into Carry flag */
6422 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6423 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6424 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6429 unop(Iop_8Uto32
, mkexpr(t_fetched
)),
6433 /* Set NDEP even though it isn't used. This makes redundant-PUT
6434 elimination of previous stores to this field work better. */
6435 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6437 /* Move reg operand from stack back to reg */
6438 if (epartIsReg(modrm
)) {
6439 /* t_esp still points at it. */
6440 putIReg(sz
, eregOfRM(modrm
), loadLE(szToITy(sz
), mkexpr(t_esp
)) );
6441 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t_esp
), mkU32(128)) );
6444 DIP("bt%s%c %s, %s\n",
6445 nameBtOp(op
), nameISize(sz
), nameIReg(sz
, gregOfRM(modrm
)),
6446 ( epartIsReg(modrm
) ? nameIReg(sz
, eregOfRM(modrm
)) : dis_buf
) );
6453 /* Handle BSF/BSR. Only v-size seems necessary. */
6455 UInt
dis_bs_E_G ( UChar sorb
, Int sz
, Int delta
, Bool fwds
)
6461 IRType ty
= szToITy(sz
);
6462 IRTemp src
= newTemp(ty
);
6463 IRTemp dst
= newTemp(ty
);
6465 IRTemp src32
= newTemp(Ity_I32
);
6466 IRTemp dst32
= newTemp(Ity_I32
);
6467 IRTemp srcB
= newTemp(Ity_I1
);
6469 vassert(sz
== 4 || sz
== 2);
6471 modrm
= getIByte(delta
);
6473 isReg
= epartIsReg(modrm
);
6476 assign( src
, getIReg(sz
, eregOfRM(modrm
)) );
6479 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
6481 assign( src
, loadLE(ty
, mkexpr(addr
)) );
6484 DIP("bs%c%c %s, %s\n",
6485 fwds
? 'f' : 'r', nameISize(sz
),
6486 ( isReg
? nameIReg(sz
, eregOfRM(modrm
)) : dis_buf
),
6487 nameIReg(sz
, gregOfRM(modrm
)));
6489 /* Generate a bool expression which is zero iff the original is
6490 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6491 instrumented by Memcheck, is instrumented expensively, since
6492 this may be used on the output of a preceding movmskb insn,
6493 which has been known to be partially defined, and in need of
6494 careful handling. */
6495 assign( srcB
, binop(mkSizedOp(ty
,Iop_ExpCmpNE8
),
6496 mkexpr(src
), mkU(ty
,0)) );
6498 /* Flags: Z is 1 iff source value is zero. All others
6499 are undefined -- we force them to zero. */
6500 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6501 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6504 IRExpr_ITE( mkexpr(srcB
),
6508 mkU32(X86G_CC_MASK_Z
)
6511 /* Set NDEP even though it isn't used. This makes redundant-PUT
6512 elimination of previous stores to this field work better. */
6513 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6515 /* Result: iff source value is zero, we can't use
6516 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6517 But anyway, Intel x86 semantics say the result is undefined in
6518 such situations. Hence handle the zero case specially. */
6520 /* Bleh. What we compute:
6522 bsf32: if src == 0 then 0 else Ctz32(src)
6523 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6525 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6526 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6528 First, widen src to 32 bits if it is not already.
6530 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6531 dst register unchanged when src == 0. Hence change accordingly.
6534 assign( src32
, unop(Iop_16Uto32
, mkexpr(src
)) );
6536 assign( src32
, mkexpr(src
) );
6538 /* The main computation, guarding against zero. */
6543 fwds
? unop(Iop_Ctz32
, mkexpr(src32
))
6546 unop(Iop_Clz32
, mkexpr(src32
))),
6547 /* src == 0 -- leave dst unchanged */
6548 widenUto32( getIReg( sz
, gregOfRM(modrm
) ) )
6553 assign( dst
, unop(Iop_32to16
, mkexpr(dst32
)) );
6555 assign( dst
, mkexpr(dst32
) );
6557 /* dump result back */
6558 putIReg( sz
, gregOfRM(modrm
), mkexpr(dst
) );
6565 void codegen_xchg_eAX_Reg ( Int sz
, Int reg
)
6567 IRType ty
= szToITy(sz
);
6568 IRTemp t1
= newTemp(ty
);
6569 IRTemp t2
= newTemp(ty
);
6570 vassert(sz
== 2 || sz
== 4);
6571 assign( t1
, getIReg(sz
, R_EAX
) );
6572 assign( t2
, getIReg(sz
, reg
) );
6573 putIReg( sz
, R_EAX
, mkexpr(t2
) );
6574 putIReg( sz
, reg
, mkexpr(t1
) );
6575 DIP("xchg%c %s, %s\n",
6576 nameISize(sz
), nameIReg(sz
, R_EAX
), nameIReg(sz
, reg
));
6581 void codegen_SAHF ( void )
6583 /* Set the flags to:
6584 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6585 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6586 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6588 UInt mask_SZACP
= X86G_CC_MASK_S
|X86G_CC_MASK_Z
|X86G_CC_MASK_A
6589 |X86G_CC_MASK_C
|X86G_CC_MASK_P
;
6590 IRTemp oldflags
= newTemp(Ity_I32
);
6591 assign( oldflags
, mk_x86g_calculate_eflags_all() );
6592 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6593 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6594 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6595 stmt( IRStmt_Put( OFFB_CC_DEP1
,
6597 binop(Iop_And32
, mkexpr(oldflags
), mkU32(X86G_CC_MASK_O
)),
6599 binop(Iop_Shr32
, getIReg(4, R_EAX
), mkU8(8)),
6603 /* Set NDEP even though it isn't used. This makes redundant-PUT
6604 elimination of previous stores to this field work better. */
6605 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6610 void codegen_LAHF ( void )
6612 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6613 IRExpr
* eax_with_hole
;
6616 UInt mask_SZACP
= X86G_CC_MASK_S
|X86G_CC_MASK_Z
|X86G_CC_MASK_A
6617 |X86G_CC_MASK_C
|X86G_CC_MASK_P
;
6619 IRTemp flags
= newTemp(Ity_I32
);
6620 assign( flags
, mk_x86g_calculate_eflags_all() );
6623 = binop(Iop_And32
, getIReg(4, R_EAX
), mkU32(0xFFFF00FF));
6625 = binop(Iop_Or32
, binop(Iop_And32
, mkexpr(flags
), mkU32(mask_SZACP
)),
6628 = binop(Iop_Or32
, eax_with_hole
,
6629 binop(Iop_Shl32
, new_byte
, mkU8(8)));
6630 putIReg(4, R_EAX
, new_eax
);
6635 UInt
dis_cmpxchg_G_E ( UChar sorb
,
6643 IRType ty
= szToITy(size
);
6644 IRTemp acc
= newTemp(ty
);
6645 IRTemp src
= newTemp(ty
);
6646 IRTemp dest
= newTemp(ty
);
6647 IRTemp dest2
= newTemp(ty
);
6648 IRTemp acc2
= newTemp(ty
);
6649 IRTemp cond
= newTemp(Ity_I1
);
6650 IRTemp addr
= IRTemp_INVALID
;
6651 UChar rm
= getUChar(delta0
);
6653 /* There are 3 cases to consider:
6655 reg-reg: ignore any lock prefix, generate sequence based
6658 reg-mem, not locked: ignore any lock prefix, generate sequence
6661 reg-mem, locked: use IRCAS
6663 if (epartIsReg(rm
)) {
6665 assign( dest
, getIReg(size
, eregOfRM(rm
)) );
6667 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6668 assign( acc
, getIReg(size
, R_EAX
) );
6669 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6670 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6671 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
6672 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6673 putIReg(size
, R_EAX
, mkexpr(acc2
));
6674 putIReg(size
, eregOfRM(rm
), mkexpr(dest2
));
6675 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6676 nameIReg(size
,gregOfRM(rm
)),
6677 nameIReg(size
,eregOfRM(rm
)) );
6679 else if (!epartIsReg(rm
) && !locked
) {
6681 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6682 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
6684 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6685 assign( acc
, getIReg(size
, R_EAX
) );
6686 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6687 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6688 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
6689 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6690 putIReg(size
, R_EAX
, mkexpr(acc2
));
6691 storeLE( mkexpr(addr
), mkexpr(dest2
) );
6692 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6693 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
6695 else if (!epartIsReg(rm
) && locked
) {
6697 /* src is new value. acc is expected value. dest is old value.
6698 Compute success from the output of the IRCAS, and steer the
6699 new value for EAX accordingly: in case of success, EAX is
6701 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6703 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6704 assign( acc
, getIReg(size
, R_EAX
) );
6706 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
6707 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
6709 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6710 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6711 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6712 putIReg(size
, R_EAX
, mkexpr(acc2
));
6713 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6714 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
6722 /* Handle conditional move instructions of the form
6723 cmovcc E(reg-or-mem), G(reg)
6725 E(src) is reg-or-mem
6728 If E is reg, --> GET %E, tmps
6733 If E is mem --> (getAddr E) -> tmpa
6740 UInt
dis_cmov_E_G ( UChar sorb
,
6745 UChar rm
= getIByte(delta0
);
6749 IRType ty
= szToITy(sz
);
6750 IRTemp tmps
= newTemp(ty
);
6751 IRTemp tmpd
= newTemp(ty
);
6753 if (epartIsReg(rm
)) {
6754 assign( tmps
, getIReg(sz
, eregOfRM(rm
)) );
6755 assign( tmpd
, getIReg(sz
, gregOfRM(rm
)) );
6757 putIReg(sz
, gregOfRM(rm
),
6758 IRExpr_ITE( mk_x86g_calculate_condition(cond
),
6762 DIP("cmov%c%s %s,%s\n", nameISize(sz
),
6763 name_X86Condcode(cond
),
6764 nameIReg(sz
,eregOfRM(rm
)),
6765 nameIReg(sz
,gregOfRM(rm
)));
6769 /* E refers to memory */
6771 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6772 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
6773 assign( tmpd
, getIReg(sz
, gregOfRM(rm
)) );
6775 putIReg(sz
, gregOfRM(rm
),
6776 IRExpr_ITE( mk_x86g_calculate_condition(cond
),
6781 DIP("cmov%c%s %s,%s\n", nameISize(sz
),
6782 name_X86Condcode(cond
),
6784 nameIReg(sz
,gregOfRM(rm
)));
6791 UInt
dis_xadd_G_E ( UChar sorb
, Bool locked
, Int sz
, Int delta0
,
6795 UChar rm
= getIByte(delta0
);
6798 IRType ty
= szToITy(sz
);
6799 IRTemp tmpd
= newTemp(ty
);
6800 IRTemp tmpt0
= newTemp(ty
);
6801 IRTemp tmpt1
= newTemp(ty
);
6803 /* There are 3 cases to consider:
6805 reg-reg: ignore any lock prefix,
6806 generate 'naive' (non-atomic) sequence
6808 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6809 (non-atomic) sequence
6811 reg-mem, locked: use IRCAS
6814 if (epartIsReg(rm
)) {
6816 assign( tmpd
, getIReg(sz
, eregOfRM(rm
)));
6817 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6818 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6819 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6820 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6821 putIReg(sz
, eregOfRM(rm
), mkexpr(tmpt1
));
6822 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6823 DIP("xadd%c %s, %s\n",
6824 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)),
6825 nameIReg(sz
,eregOfRM(rm
)));
6829 else if (!epartIsReg(rm
) && !locked
) {
6831 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6832 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
6833 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6834 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6835 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6836 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
6837 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6838 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6839 DIP("xadd%c %s, %s\n",
6840 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)), dis_buf
);
6844 else if (!epartIsReg(rm
) && locked
) {
6846 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6847 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
6848 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6849 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6850 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6851 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
6852 mkexpr(tmpt1
)/*newVal*/, guest_EIP_curr_instr
);
6853 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6854 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6855 DIP("xadd%c %s, %s\n",
6856 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)), dis_buf
);
6864 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6867 UInt
dis_mov_Ew_Sw ( UChar sorb
, Int delta0
)
6871 UChar rm
= getIByte(delta0
);
6874 if (epartIsReg(rm
)) {
6875 putSReg( gregOfRM(rm
), getIReg(2, eregOfRM(rm
)) );
6876 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm
)), nameSReg(gregOfRM(rm
)));
6879 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6880 putSReg( gregOfRM(rm
), loadLE(Ity_I16
, mkexpr(addr
)) );
6881 DIP("movw %s,%s\n", dis_buf
, nameSReg(gregOfRM(rm
)));
6886 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6887 dst is ireg and sz==4, zero out top half of it. */
6890 UInt
dis_mov_Sw_Ew ( UChar sorb
,
6896 UChar rm
= getIByte(delta0
);
6899 vassert(sz
== 2 || sz
== 4);
6901 if (epartIsReg(rm
)) {
6903 putIReg(4, eregOfRM(rm
), unop(Iop_16Uto32
, getSReg(gregOfRM(rm
))));
6905 putIReg(2, eregOfRM(rm
), getSReg(gregOfRM(rm
)));
6907 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm
)), nameIReg(sz
,eregOfRM(rm
)));
6910 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6911 storeLE( mkexpr(addr
), getSReg(gregOfRM(rm
)) );
6912 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm
)), dis_buf
);
6919 void dis_push_segreg ( UInt sreg
, Int sz
)
6921 IRTemp t1
= newTemp(Ity_I16
);
6922 IRTemp ta
= newTemp(Ity_I32
);
6923 vassert(sz
== 2 || sz
== 4);
6925 assign( t1
, getSReg(sreg
) );
6926 assign( ta
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(sz
)) );
6927 putIReg(4, R_ESP
, mkexpr(ta
));
6928 storeLE( mkexpr(ta
), mkexpr(t1
) );
6930 DIP("push%c %s\n", sz
==2 ? 'w' : 'l', nameSReg(sreg
));
6934 void dis_pop_segreg ( UInt sreg
, Int sz
)
6936 IRTemp t1
= newTemp(Ity_I16
);
6937 IRTemp ta
= newTemp(Ity_I32
);
6938 vassert(sz
== 2 || sz
== 4);
6940 assign( ta
, getIReg(4, R_ESP
) );
6941 assign( t1
, loadLE(Ity_I16
, mkexpr(ta
)) );
6943 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(ta
), mkU32(sz
)) );
6944 putSReg( sreg
, mkexpr(t1
) );
6945 DIP("pop%c %s\n", sz
==2 ? 'w' : 'l', nameSReg(sreg
));
6949 void dis_ret ( /*MOD*/DisResult
* dres
, UInt d32
)
6951 IRTemp t1
= newTemp(Ity_I32
);
6952 IRTemp t2
= newTemp(Ity_I32
);
6953 assign(t1
, getIReg(4,R_ESP
));
6954 assign(t2
, loadLE(Ity_I32
,mkexpr(t1
)));
6955 putIReg(4, R_ESP
,binop(Iop_Add32
, mkexpr(t1
), mkU32(4+d32
)));
6956 jmp_treg(dres
, Ijk_Ret
, t2
);
6957 vassert(dres
->whatNext
== Dis_StopHere
);
6960 /*------------------------------------------------------------*/
6961 /*--- SSE/SSE2/SSE3 helpers ---*/
6962 /*------------------------------------------------------------*/
6964 /* Indicates whether the op requires a rounding-mode argument. Note
6965 that this covers only vector floating point arithmetic ops, and
6966 omits the scalar ones that need rounding modes. Note also that
6967 inconsistencies here will get picked up later by the IR sanity
6968 checker, so this isn't correctness-critical. */
6969 static Bool
requiresRMode ( IROp op
)
6973 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
6974 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
6975 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
6976 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
6985 /* Worker function; do not call directly.
6986 Handles full width G = G `op` E and G = (not G) `op` E.
6989 static UInt
dis_SSE_E_to_G_all_wrk (
6990 UChar sorb
, Int delta
,
6991 const HChar
* opname
, IROp op
,
6998 UChar rm
= getIByte(delta
);
7000 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRM(rm
)))
7001 : getXMMReg(gregOfRM(rm
));
7002 if (epartIsReg(rm
)) {
7006 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7008 getXMMReg(eregOfRM(rm
)))
7010 getXMMReg(eregOfRM(rm
)))
7012 DIP("%s %s,%s\n", opname
,
7013 nameXMMReg(eregOfRM(rm
)),
7014 nameXMMReg(gregOfRM(rm
)) );
7017 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7021 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7023 loadLE(Ity_V128
, mkexpr(addr
)))
7025 loadLE(Ity_V128
, mkexpr(addr
)))
7027 DIP("%s %s,%s\n", opname
,
7029 nameXMMReg(gregOfRM(rm
)) );
7035 /* All lanes SSE binary operation, G = G `op` E. */
7038 UInt
dis_SSE_E_to_G_all ( UChar sorb
, Int delta
, const HChar
* opname
, IROp op
)
7040 return dis_SSE_E_to_G_all_wrk( sorb
, delta
, opname
, op
, False
);
7043 /* All lanes SSE binary operation, G = (not G) `op` E. */
7046 UInt
dis_SSE_E_to_G_all_invG ( UChar sorb
, Int delta
,
7047 const HChar
* opname
, IROp op
)
7049 return dis_SSE_E_to_G_all_wrk( sorb
, delta
, opname
, op
, True
);
7053 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7055 static UInt
dis_SSE_E_to_G_lo32 ( UChar sorb
, Int delta
,
7056 const HChar
* opname
, IROp op
)
7061 UChar rm
= getIByte(delta
);
7062 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7063 if (epartIsReg(rm
)) {
7064 putXMMReg( gregOfRM(rm
),
7066 getXMMReg(eregOfRM(rm
))) );
7067 DIP("%s %s,%s\n", opname
,
7068 nameXMMReg(eregOfRM(rm
)),
7069 nameXMMReg(gregOfRM(rm
)) );
7072 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7073 E operand needs to be made simply of zeroes. */
7074 IRTemp epart
= newTemp(Ity_V128
);
7075 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7076 assign( epart
, unop( Iop_32UtoV128
,
7077 loadLE(Ity_I32
, mkexpr(addr
))) );
7078 putXMMReg( gregOfRM(rm
),
7079 binop(op
, gpart
, mkexpr(epart
)) );
7080 DIP("%s %s,%s\n", opname
,
7082 nameXMMReg(gregOfRM(rm
)) );
7088 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7090 static UInt
dis_SSE_E_to_G_lo64 ( UChar sorb
, Int delta
,
7091 const HChar
* opname
, IROp op
)
7096 UChar rm
= getIByte(delta
);
7097 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7098 if (epartIsReg(rm
)) {
7099 putXMMReg( gregOfRM(rm
),
7101 getXMMReg(eregOfRM(rm
))) );
7102 DIP("%s %s,%s\n", opname
,
7103 nameXMMReg(eregOfRM(rm
)),
7104 nameXMMReg(gregOfRM(rm
)) );
7107 /* We can only do a 64-bit memory read, so the upper half of the
7108 E operand needs to be made simply of zeroes. */
7109 IRTemp epart
= newTemp(Ity_V128
);
7110 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7111 assign( epart
, unop( Iop_64UtoV128
,
7112 loadLE(Ity_I64
, mkexpr(addr
))) );
7113 putXMMReg( gregOfRM(rm
),
7114 binop(op
, gpart
, mkexpr(epart
)) );
7115 DIP("%s %s,%s\n", opname
,
7117 nameXMMReg(gregOfRM(rm
)) );
7123 /* All lanes unary SSE operation, G = op(E). */
7125 static UInt
dis_SSE_E_to_G_unary_all (
7126 UChar sorb
, Int delta
,
7127 const HChar
* opname
, IROp op
7133 UChar rm
= getIByte(delta
);
7134 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
7135 // up in the usual way.
7136 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
7137 if (epartIsReg(rm
)) {
7138 IRExpr
* src
= getXMMReg(eregOfRM(rm
));
7139 /* XXXROUNDINGFIXME */
7140 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
7142 putXMMReg( gregOfRM(rm
), res
);
7143 DIP("%s %s,%s\n", opname
,
7144 nameXMMReg(eregOfRM(rm
)),
7145 nameXMMReg(gregOfRM(rm
)) );
7148 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7149 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
7150 /* XXXROUNDINGFIXME */
7151 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
7153 putXMMReg( gregOfRM(rm
), res
);
7154 DIP("%s %s,%s\n", opname
,
7156 nameXMMReg(gregOfRM(rm
)) );
7162 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7164 static UInt
dis_SSE_E_to_G_unary_lo32 (
7165 UChar sorb
, Int delta
,
7166 const HChar
* opname
, IROp op
7169 /* First we need to get the old G value and patch the low 32 bits
7170 of the E operand into it. Then apply op and write back to G. */
7174 UChar rm
= getIByte(delta
);
7175 IRTemp oldG0
= newTemp(Ity_V128
);
7176 IRTemp oldG1
= newTemp(Ity_V128
);
7178 assign( oldG0
, getXMMReg(gregOfRM(rm
)) );
7180 if (epartIsReg(rm
)) {
7182 binop( Iop_SetV128lo32
,
7184 getXMMRegLane32(eregOfRM(rm
), 0)) );
7185 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7186 DIP("%s %s,%s\n", opname
,
7187 nameXMMReg(eregOfRM(rm
)),
7188 nameXMMReg(gregOfRM(rm
)) );
7191 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7193 binop( Iop_SetV128lo32
,
7195 loadLE(Ity_I32
, mkexpr(addr
)) ));
7196 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7197 DIP("%s %s,%s\n", opname
,
7199 nameXMMReg(gregOfRM(rm
)) );
7205 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7207 static UInt
dis_SSE_E_to_G_unary_lo64 (
7208 UChar sorb
, Int delta
,
7209 const HChar
* opname
, IROp op
7212 /* First we need to get the old G value and patch the low 64 bits
7213 of the E operand into it. Then apply op and write back to G. */
7217 UChar rm
= getIByte(delta
);
7218 IRTemp oldG0
= newTemp(Ity_V128
);
7219 IRTemp oldG1
= newTemp(Ity_V128
);
7221 assign( oldG0
, getXMMReg(gregOfRM(rm
)) );
7223 if (epartIsReg(rm
)) {
7225 binop( Iop_SetV128lo64
,
7227 getXMMRegLane64(eregOfRM(rm
), 0)) );
7228 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7229 DIP("%s %s,%s\n", opname
,
7230 nameXMMReg(eregOfRM(rm
)),
7231 nameXMMReg(gregOfRM(rm
)) );
7234 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7236 binop( Iop_SetV128lo64
,
7238 loadLE(Ity_I64
, mkexpr(addr
)) ));
7239 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7240 DIP("%s %s,%s\n", opname
,
7242 nameXMMReg(gregOfRM(rm
)) );
7248 /* SSE integer binary operation:
7249 G = G `op` E (eLeft == False)
7250 G = E `op` G (eLeft == True)
7252 static UInt
dis_SSEint_E_to_G(
7253 UChar sorb
, Int delta
,
7254 const HChar
* opname
, IROp op
,
7261 UChar rm
= getIByte(delta
);
7262 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7263 IRExpr
* epart
= NULL
;
7264 if (epartIsReg(rm
)) {
7265 epart
= getXMMReg(eregOfRM(rm
));
7266 DIP("%s %s,%s\n", opname
,
7267 nameXMMReg(eregOfRM(rm
)),
7268 nameXMMReg(gregOfRM(rm
)) );
7271 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7272 epart
= loadLE(Ity_V128
, mkexpr(addr
));
7273 DIP("%s %s,%s\n", opname
,
7275 nameXMMReg(gregOfRM(rm
)) );
7278 putXMMReg( gregOfRM(rm
),
7279 eLeft
? binop(op
, epart
, gpart
)
7280 : binop(op
, gpart
, epart
) );
7285 /* Helper for doing SSE FP comparisons. */
7287 static void findSSECmpOp ( Bool
* needNot
, IROp
* op
,
7288 Int imm8
, Bool all_lanes
, Int sz
)
7298 if (sz
== 4 && all_lanes
) {
7300 case 0: *op
= Iop_CmpEQ32Fx4
; return;
7301 case 1: *op
= Iop_CmpLT32Fx4
; return;
7302 case 2: *op
= Iop_CmpLE32Fx4
; return;
7303 case 3: *op
= Iop_CmpUN32Fx4
; return;
7307 if (sz
== 4 && !all_lanes
) {
7309 case 0: *op
= Iop_CmpEQ32F0x4
; return;
7310 case 1: *op
= Iop_CmpLT32F0x4
; return;
7311 case 2: *op
= Iop_CmpLE32F0x4
; return;
7312 case 3: *op
= Iop_CmpUN32F0x4
; return;
7316 if (sz
== 8 && all_lanes
) {
7318 case 0: *op
= Iop_CmpEQ64Fx2
; return;
7319 case 1: *op
= Iop_CmpLT64Fx2
; return;
7320 case 2: *op
= Iop_CmpLE64Fx2
; return;
7321 case 3: *op
= Iop_CmpUN64Fx2
; return;
7325 if (sz
== 8 && !all_lanes
) {
7327 case 0: *op
= Iop_CmpEQ64F0x2
; return;
7328 case 1: *op
= Iop_CmpLT64F0x2
; return;
7329 case 2: *op
= Iop_CmpLE64F0x2
; return;
7330 case 3: *op
= Iop_CmpUN64F0x2
; return;
7334 vpanic("findSSECmpOp(x86,guest)");
7337 /* Handles SSE 32F/64F comparisons. */
7339 static UInt
dis_SSEcmp_E_to_G ( UChar sorb
, Int delta
,
7340 const HChar
* opname
, Bool all_lanes
, Int sz
)
7345 Bool needNot
= False
;
7346 IROp op
= Iop_INVALID
;
7347 IRTemp plain
= newTemp(Ity_V128
);
7348 UChar rm
= getIByte(delta
);
7350 vassert(sz
== 4 || sz
== 8);
7351 if (epartIsReg(rm
)) {
7352 imm8
= getIByte(delta
+1);
7353 findSSECmpOp(&needNot
, &op
, imm8
, all_lanes
, sz
);
7354 assign( plain
, binop(op
, getXMMReg(gregOfRM(rm
)),
7355 getXMMReg(eregOfRM(rm
))) );
7357 DIP("%s $%d,%s,%s\n", opname
,
7359 nameXMMReg(eregOfRM(rm
)),
7360 nameXMMReg(gregOfRM(rm
)) );
7362 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7363 imm8
= getIByte(delta
+alen
);
7364 findSSECmpOp(&needNot
, &op
, imm8
, all_lanes
, sz
);
7368 getXMMReg(gregOfRM(rm
)),
7369 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
7370 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
7371 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
7375 DIP("%s $%d,%s,%s\n", opname
,
7378 nameXMMReg(gregOfRM(rm
)) );
7381 if (needNot
&& all_lanes
) {
7382 putXMMReg( gregOfRM(rm
),
7383 unop(Iop_NotV128
, mkexpr(plain
)) );
7386 if (needNot
&& !all_lanes
) {
7387 mask
= toUShort( sz
==4 ? 0x000F : 0x00FF );
7388 putXMMReg( gregOfRM(rm
),
7389 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
7392 putXMMReg( gregOfRM(rm
), mkexpr(plain
) );
7399 /* Vector by scalar shift of G by the amount specified at the bottom
7402 static UInt
dis_SSE_shiftG_byE ( UChar sorb
, Int delta
,
7403 const HChar
* opname
, IROp op
)
7409 UChar rm
= getIByte(delta
);
7410 IRTemp g0
= newTemp(Ity_V128
);
7411 IRTemp g1
= newTemp(Ity_V128
);
7412 IRTemp amt
= newTemp(Ity_I32
);
7413 IRTemp amt8
= newTemp(Ity_I8
);
7414 if (epartIsReg(rm
)) {
7415 assign( amt
, getXMMRegLane32(eregOfRM(rm
), 0) );
7416 DIP("%s %s,%s\n", opname
,
7417 nameXMMReg(eregOfRM(rm
)),
7418 nameXMMReg(gregOfRM(rm
)) );
7421 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7422 assign( amt
, loadLE(Ity_I32
, mkexpr(addr
)) );
7423 DIP("%s %s,%s\n", opname
,
7425 nameXMMReg(gregOfRM(rm
)) );
7428 assign( g0
, getXMMReg(gregOfRM(rm
)) );
7429 assign( amt8
, unop(Iop_32to8
, mkexpr(amt
)) );
7431 shl
= shr
= sar
= False
;
7434 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
7435 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
7436 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
7437 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
7438 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
7439 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
7440 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
7441 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
7442 default: vassert(0);
7449 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
7450 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7459 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
7460 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7461 binop(op
, mkexpr(g0
), mkU8(size
-1))
7469 putXMMReg( gregOfRM(rm
), mkexpr(g1
) );
7474 /* Vector by scalar shift of E by an immediate byte. */
7477 UInt
dis_SSE_shiftE_imm ( Int delta
, const HChar
* opname
, IROp op
)
7480 UChar rm
= getIByte(delta
);
7481 IRTemp e0
= newTemp(Ity_V128
);
7482 IRTemp e1
= newTemp(Ity_V128
);
7484 vassert(epartIsReg(rm
));
7485 vassert(gregOfRM(rm
) == 2
7486 || gregOfRM(rm
) == 4 || gregOfRM(rm
) == 6);
7487 amt
= getIByte(delta
+1);
7489 DIP("%s $%d,%s\n", opname
,
7491 nameXMMReg(eregOfRM(rm
)) );
7492 assign( e0
, getXMMReg(eregOfRM(rm
)) );
7494 shl
= shr
= sar
= False
;
7497 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
7498 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
7499 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
7500 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
7501 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
7502 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
7503 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
7504 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
7505 default: vassert(0);
7509 assign( e1
, amt
>= size
7511 : binop(op
, mkexpr(e0
), mkU8(amt
))
7515 assign( e1
, amt
>= size
7516 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7517 : binop(op
, mkexpr(e0
), mkU8(amt
))
7524 putXMMReg( eregOfRM(rm
), mkexpr(e1
) );
7529 /* Get the current SSE rounding mode. */
7531 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7533 return binop( Iop_And32
,
7534 IRExpr_Get( OFFB_SSEROUND
, Ity_I32
),
7538 static void put_sse_roundingmode ( IRExpr
* sseround
)
7540 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
7541 stmt( IRStmt_Put( OFFB_SSEROUND
, sseround
) );
7544 /* Break a 128-bit value up into four 32-bit ints. */
7546 static void breakup128to32s ( IRTemp t128
,
7548 IRTemp
* t3
, IRTemp
* t2
,
7549 IRTemp
* t1
, IRTemp
* t0
)
7551 IRTemp hi64
= newTemp(Ity_I64
);
7552 IRTemp lo64
= newTemp(Ity_I64
);
7553 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
7554 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
7556 vassert(t0
&& *t0
== IRTemp_INVALID
);
7557 vassert(t1
&& *t1
== IRTemp_INVALID
);
7558 vassert(t2
&& *t2
== IRTemp_INVALID
);
7559 vassert(t3
&& *t3
== IRTemp_INVALID
);
7561 *t0
= newTemp(Ity_I32
);
7562 *t1
= newTemp(Ity_I32
);
7563 *t2
= newTemp(Ity_I32
);
7564 *t3
= newTemp(Ity_I32
);
7565 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
7566 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
7567 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
7568 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
7571 /* Construct a 128-bit value from four 32-bit ints. */
7573 static IRExpr
* mk128from32s ( IRTemp t3
, IRTemp t2
,
7574 IRTemp t1
, IRTemp t0
)
7577 binop( Iop_64HLtoV128
,
7578 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
7579 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
7583 /* Break a 64-bit value up into four 16-bit ints. */
7585 static void breakup64to16s ( IRTemp t64
,
7587 IRTemp
* t3
, IRTemp
* t2
,
7588 IRTemp
* t1
, IRTemp
* t0
)
7590 IRTemp hi32
= newTemp(Ity_I32
);
7591 IRTemp lo32
= newTemp(Ity_I32
);
7592 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
7593 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
7595 vassert(t0
&& *t0
== IRTemp_INVALID
);
7596 vassert(t1
&& *t1
== IRTemp_INVALID
);
7597 vassert(t2
&& *t2
== IRTemp_INVALID
);
7598 vassert(t3
&& *t3
== IRTemp_INVALID
);
7600 *t0
= newTemp(Ity_I16
);
7601 *t1
= newTemp(Ity_I16
);
7602 *t2
= newTemp(Ity_I16
);
7603 *t3
= newTemp(Ity_I16
);
7604 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
7605 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
7606 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
7607 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
7610 /* Construct a 64-bit value from four 16-bit ints. */
7612 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
7613 IRTemp t1
, IRTemp t0
)
7616 binop( Iop_32HLto64
,
7617 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
7618 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
7622 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7623 in the given 32-bit temporary. The flags that are set are: O S Z A
7626 In all cases, code to set AC is generated. However, VEX actually
7627 ignores the AC value and so can optionally emit an emulation
7628 warning when it is enabled. In this routine, an emulation warning
7629 is only emitted if emit_AC_emwarn is True, in which case
7630 next_insn_EIP must be correct (this allows for correct code
7631 generation for popfl/popfw). If emit_AC_emwarn is False,
7632 next_insn_EIP is unimportant (this allows for easy if kludgey code
7633 generation for IRET.) */
7636 void set_EFLAGS_from_value ( IRTemp t1
,
7637 Bool emit_AC_emwarn
,
7638 Addr32 next_insn_EIP
)
7640 vassert(typeOfIRTemp(irsb
->tyenv
,t1
) == Ity_I32
);
7642 /* t1 is the flag word. Mask out everything except OSZACP and set
7643 the flags thunk to X86G_CC_OP_COPY. */
7644 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
7645 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
7646 stmt( IRStmt_Put( OFFB_CC_DEP1
,
7649 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
7650 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
7651 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
7655 /* Set NDEP even though it isn't used. This makes redundant-PUT
7656 elimination of previous stores to this field work better. */
7657 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
7659 /* Also need to set the D flag, which is held in bit 10 of t1.
7660 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7666 binop(Iop_Shr32
, mkexpr(t1
), mkU8(10)),
7672 /* Set the ID flag */
7678 binop(Iop_Shr32
, mkexpr(t1
), mkU8(21)),
7684 /* And set the AC flag. If setting it 1 to, possibly emit an
7685 emulation warning. */
7691 binop(Iop_Shr32
, mkexpr(t1
), mkU8(18)),
7697 if (emit_AC_emwarn
) {
7698 put_emwarn( mkU32(EmWarn_X86_acFlag
) );
7702 binop(Iop_And32
, mkexpr(t1
), mkU32(1<<18)),
7705 IRConst_U32( next_insn_EIP
),
7713 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7714 values (aa,bb), computes, for each of the 4 16-bit lanes:
7716 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7718 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
7720 IRTemp aa
= newTemp(Ity_I64
);
7721 IRTemp bb
= newTemp(Ity_I64
);
7722 IRTemp aahi32s
= newTemp(Ity_I64
);
7723 IRTemp aalo32s
= newTemp(Ity_I64
);
7724 IRTemp bbhi32s
= newTemp(Ity_I64
);
7725 IRTemp bblo32s
= newTemp(Ity_I64
);
7726 IRTemp rHi
= newTemp(Ity_I64
);
7727 IRTemp rLo
= newTemp(Ity_I64
);
7728 IRTemp one32x2
= newTemp(Ity_I64
);
7733 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
7737 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
7741 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
7745 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
7747 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
7756 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
7772 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
7781 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
7784 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7785 values (aa,bb), computes, for each lane:
7787 if aa_lane < 0 then - bb_lane
7788 else if aa_lane > 0 then bb_lane
7791 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
7793 IRTemp aa
= newTemp(Ity_I64
);
7794 IRTemp bb
= newTemp(Ity_I64
);
7795 IRTemp zero
= newTemp(Ity_I64
);
7796 IRTemp bbNeg
= newTemp(Ity_I64
);
7797 IRTemp negMask
= newTemp(Ity_I64
);
7798 IRTemp posMask
= newTemp(Ity_I64
);
7799 IROp opSub
= Iop_INVALID
;
7800 IROp opCmpGTS
= Iop_INVALID
;
7803 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
7804 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
7805 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
7806 default: vassert(0);
7811 assign( zero
, mkU64(0) );
7812 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
7813 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
7814 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
7818 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
7819 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
7823 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7824 value aa, computes, for each lane
7826 if aa < 0 then -aa else aa
7828 Note that the result is interpreted as unsigned, so that the
7829 absolute value of the most negative signed input can be
7832 static IRExpr
* dis_PABS_helper ( IRExpr
* aax
, Int laneszB
)
7834 IRTemp aa
= newTemp(Ity_I64
);
7835 IRTemp zero
= newTemp(Ity_I64
);
7836 IRTemp aaNeg
= newTemp(Ity_I64
);
7837 IRTemp negMask
= newTemp(Ity_I64
);
7838 IRTemp posMask
= newTemp(Ity_I64
);
7839 IROp opSub
= Iop_INVALID
;
7840 IROp opSarN
= Iop_INVALID
;
7843 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
7844 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
7845 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
7846 default: vassert(0);
7850 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
7851 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
7852 assign( zero
, mkU64(0) );
7853 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
7856 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
7857 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) );
7860 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
7861 IRTemp lo64
, Int byteShift
)
7863 vassert(byteShift
>= 1 && byteShift
<= 7);
7866 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
7867 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
7871 /* Generate a SIGSEGV followed by a restart of the current instruction
7872 if effective_addr is not 16-aligned. This is required behaviour
7873 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7874 This assumes that guest_RIP_curr_instr is set correctly! */
7875 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr
)
7880 binop(Iop_And32
,mkexpr(effective_addr
),mkU32(0xF)),
7883 IRConst_U32(guest_EIP_curr_instr
),
7890 /* Helper for deciding whether a given insn (starting at the opcode
7891 byte) may validly be used with a LOCK prefix. The following insns
7892 may be used with LOCK when their destination operand is in memory.
7893 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7895 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7896 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7897 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7898 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7899 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7900 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7901 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7915 CMPXCHG 0F B0, 0F B1
7920 ------------------------------
7922 80 /0 = addb $imm8, rm8
7923 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7924 82 /0 = addb $imm8, rm8
7925 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7928 01 = addl r32, rm32 and addw r16, rm16
7930 Same for ADD OR ADC SBB AND SUB XOR
7933 FF /1 = dec rm32 and dec rm16
7936 FF /0 = inc rm32 and inc rm16
7939 F7 /3 = neg rm32 and neg rm16
7942 F7 /2 = not rm32 and not rm16
7944 0F BB = btcw r16, rm16 and btcl r32, rm32
7945 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7949 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
7952 case 0x00: case 0x01: case 0x08: case 0x09:
7953 case 0x10: case 0x11: case 0x18: case 0x19:
7954 case 0x20: case 0x21: case 0x28: case 0x29:
7955 case 0x30: case 0x31:
7956 if (!epartIsReg(opc
[1]))
7960 case 0x80: case 0x81: case 0x82: case 0x83:
7961 if (gregOfRM(opc
[1]) >= 0 && gregOfRM(opc
[1]) <= 6
7962 && !epartIsReg(opc
[1]))
7966 case 0xFE: case 0xFF:
7967 if (gregOfRM(opc
[1]) >= 0 && gregOfRM(opc
[1]) <= 1
7968 && !epartIsReg(opc
[1]))
7972 case 0xF6: case 0xF7:
7973 if (gregOfRM(opc
[1]) >= 2 && gregOfRM(opc
[1]) <= 3
7974 && !epartIsReg(opc
[1]))
7978 case 0x86: case 0x87:
7979 if (!epartIsReg(opc
[1]))
7985 case 0xBB: case 0xB3: case 0xAB:
7986 if (!epartIsReg(opc
[2]))
7990 if (gregOfRM(opc
[2]) >= 5 && gregOfRM(opc
[2]) <= 7
7991 && !epartIsReg(opc
[2]))
7994 case 0xB0: case 0xB1:
7995 if (!epartIsReg(opc
[2]))
7999 if (gregOfRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
8002 case 0xC0: case 0xC1:
8003 if (!epartIsReg(opc
[2]))
8008 } /* switch (opc[1]) */
8014 } /* switch (opc[0]) */
8019 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
8021 IRTemp t2
= newTemp(ty
);
8022 if (ty
== Ity_I32
) {
8026 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
8029 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
8032 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
8034 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
8040 if (ty
== Ity_I16
) {
8043 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
8044 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
8049 return IRTemp_INVALID
;
8052 /*------------------------------------------------------------*/
8053 /*--- Disassemble a single instruction ---*/
8054 /*------------------------------------------------------------*/
8056 /* Disassemble a single instruction into IR. The instruction is
8057 located in host memory at &guest_code[delta]. *expect_CAS is set
8058 to True if the resulting IR is expected to contain an IRCAS
8059 statement, and False if it's not expected to. This makes it
8060 possible for the caller of disInstr_X86_WRK to check that
8061 LOCK-prefixed instructions are at least plausibly translated, in
8062 that it becomes possible to check that a (validly) LOCK-prefixed
8063 instruction generates a translation containing an IRCAS, and
8064 instructions without LOCK prefixes don't generate translations
8065 containing an IRCAS.
8068 DisResult
disInstr_X86_WRK (
8069 /*OUT*/Bool
* expect_CAS
,
8071 const VexArchInfo
* archinfo
,
8072 const VexAbiInfo
* vbi
,
8077 IRTemp addr
, t0
, t1
, t2
, t3
, t4
, t5
, t6
;
8079 UChar opc
, modrm
, abyte
, pre
;
8082 Int am_sz
, d_sz
, n_prefixes
;
8084 const UChar
* insn
; /* used in SSE decoders */
8086 /* The running delta */
8087 Int delta
= (Int
)delta64
;
8089 /* Holds eip at the start of the insn, so that we can print
8090 consistent error messages for unimplemented insns. */
8091 Int delta_start
= delta
;
8093 /* sz denotes the nominal data-op size of the insn; we change it to
8094 2 if an 0x66 prefix is seen */
8097 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8098 prefix has been seen, else one of {0x26, 0x36, 0x3E, 0x64, 0x65}
8099 indicating the prefix. */
8102 /* Gets set to True if a LOCK prefix is seen. */
8103 Bool pfx_lock
= False
;
8105 /* Set result defaults. */
8106 dres
.whatNext
= Dis_Continue
;
8108 dres
.hint
= Dis_HintNone
;
8109 dres
.jk_StopHere
= Ijk_INVALID
;
8111 *expect_CAS
= False
;
8113 addr
= t0
= t1
= t2
= t3
= t4
= t5
= t6
= IRTemp_INVALID
;
8115 vassert(guest_EIP_bbstart
+ delta
== guest_EIP_curr_instr
);
8116 DIP("\t0x%x: ", guest_EIP_bbstart
+delta
);
8118 /* Spot "Special" instructions (see comment at top of file). */
8120 const UChar
* code
= guest_code
+ delta
;
8121 /* Spot the 12-byte preamble:
8122 C1C703 roll $3, %edi
8123 C1C70D roll $13, %edi
8124 C1C71D roll $29, %edi
8125 C1C713 roll $19, %edi
8127 if (code
[ 0] == 0xC1 && code
[ 1] == 0xC7 && code
[ 2] == 0x03 &&
8128 code
[ 3] == 0xC1 && code
[ 4] == 0xC7 && code
[ 5] == 0x0D &&
8129 code
[ 6] == 0xC1 && code
[ 7] == 0xC7 && code
[ 8] == 0x1D &&
8130 code
[ 9] == 0xC1 && code
[10] == 0xC7 && code
[11] == 0x13) {
8131 /* Got a "Special" instruction preamble. Which one is it? */
8132 if (code
[12] == 0x87 && code
[13] == 0xDB /* xchgl %ebx,%ebx */) {
8133 /* %EDX = client_request ( %EAX ) */
8134 DIP("%%edx = client_request ( %%eax )\n");
8136 jmp_lit(&dres
, Ijk_ClientReq
, guest_EIP_bbstart
+delta
);
8137 vassert(dres
.whatNext
== Dis_StopHere
);
8138 goto decode_success
;
8141 if (code
[12] == 0x87 && code
[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8142 /* %EAX = guest_NRADDR */
8143 DIP("%%eax = guest_NRADDR\n");
8145 putIReg(4, R_EAX
, IRExpr_Get( OFFB_NRADDR
, Ity_I32
));
8146 goto decode_success
;
8149 if (code
[12] == 0x87 && code
[13] == 0xD2 /* xchgl %edx,%edx */) {
8150 /* call-noredir *%EAX */
8151 DIP("call-noredir *%%eax\n");
8153 t1
= newTemp(Ity_I32
);
8154 assign(t1
, getIReg(4,R_EAX
));
8155 t2
= newTemp(Ity_I32
);
8156 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
8157 putIReg(4, R_ESP
, mkexpr(t2
));
8158 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
));
8159 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
8160 vassert(dres
.whatNext
== Dis_StopHere
);
8161 goto decode_success
;
8164 if (code
[12] == 0x87 && code
[13] == 0xFF /* xchgl %edi,%edi */) {
8166 DIP("IR injection\n");
8167 vex_inject_ir(irsb
, Iend_LE
);
8169 // Invalidate the current insn. The reason is that the IRop we're
8170 // injecting here can change. In which case the translation has to
8171 // be redone. For ease of handling, we simply invalidate all the
8173 stmt(IRStmt_Put(OFFB_CMSTART
, mkU32(guest_EIP_curr_instr
)));
8174 stmt(IRStmt_Put(OFFB_CMLEN
, mkU32(14)));
8178 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_bbstart
+ delta
) ) );
8179 dres
.whatNext
= Dis_StopHere
;
8180 dres
.jk_StopHere
= Ijk_InvalICache
;
8181 goto decode_success
;
8183 /* We don't know what it is. */
8184 goto decode_failure
;
8189 /* Handle a couple of weird-ass NOPs that have been observed in the
8192 const UChar
* code
= guest_code
+ delta
;
8193 /* Sun's JVM 1.5.0 uses the following as a NOP:
8194 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8195 if (code
[0] == 0x26 && code
[1] == 0x2E && code
[2] == 0x64
8196 && code
[3] == 0x65 && code
[4] == 0x90) {
8197 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8199 goto decode_success
;
8201 /* Don't barf on recent (2010) binutils padding,
8202 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8203 66 2e 0f 1f 84 00 00 00 00 00
8204 66 66 2e 0f 1f 84 00 00 00 00 00
8205 66 66 66 2e 0f 1f 84 00 00 00 00 00
8206 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8207 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8208 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8210 if (code
[0] == 0x66) {
8212 for (data16_cnt
= 1; data16_cnt
< 6; data16_cnt
++)
8213 if (code
[data16_cnt
] != 0x66)
8215 if (code
[data16_cnt
] == 0x2E && code
[data16_cnt
+ 1] == 0x0F
8216 && code
[data16_cnt
+ 2] == 0x1F && code
[data16_cnt
+ 3] == 0x84
8217 && code
[data16_cnt
+ 4] == 0x00 && code
[data16_cnt
+ 5] == 0x00
8218 && code
[data16_cnt
+ 6] == 0x00 && code
[data16_cnt
+ 7] == 0x00
8219 && code
[data16_cnt
+ 8] == 0x00 ) {
8220 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8221 delta
+= 9 + data16_cnt
;
8222 goto decode_success
;
8226 /* bug478624 GNU binutils uses a leal of esi into itself with
8227 a zero offset and CS prefix as an 8 byte no-op (Dec 2023).
8228 Since the CS prefix is hardly ever used we don't do much
8229 to decode it, just a few cases for conditional branches.
8230 So add handling here with other pseudo-no-ops.
8232 if (code
[0] == 0x2E && code
[1] == 0x8D) {
8233 if (code
[2] == 0x74 && code
[3] == 0x26 && code
[4] == 0x00) {
8234 DIP("leal %%cs:0(%%esi,%%eiz,1),%%esi\n");
8236 goto decode_success
;
8238 if (code
[2] == 0xB4 && code
[3] == 0x26 && code
[4] == 0x00
8239 && code
[5] == 0x00 && code
[6] == 0x00 && code
[7] == 0x00) {
8240 DIP("leal %%cs:0(%%esi,%%eiz,1),%%esi\n");
8242 goto decode_success
;
8246 // Intel CET requires the following opcodes to be treated as NOPs
8247 // with any prefix and ModRM, SIB and disp combination:
8248 // "0F 19", "0F 1C", "0F 1D", "0F 1E", "0F 1F"
8249 UInt opcode_index
= 0;
8250 // Skip any prefix combination
8251 UInt addr_override
= 0;
8253 Bool is_prefix
= True
;
8255 switch (code
[opcode_index
]) {
8264 case 0x26: case 0x3E: // if we set segment override here,
8265 case 0x64: case 0x65: // disAMode segfaults
8266 case 0x2E: case 0x36:
8267 case 0xF0: case 0xF2: case 0xF3:
8275 if (code
[opcode_index
] == 0x0F) {
8276 switch (code
[opcode_index
+1]) {
8278 case 0x1C: case 0x1D:
8279 case 0x1E: case 0x1F:
8280 delta
+= opcode_index
+2;
8281 modrm
= getUChar(delta
);
8282 if (epartIsReg(modrm
)) {
8284 DIP("nop%c\n", nameISize(temp_sz
));
8287 addr
= disAMode(&alen
, 0/*"no sorb"*/, delta
, dis_buf
);
8288 delta
+= alen
- addr_override
;
8289 DIP("nop%c %s\n", nameISize(temp_sz
), dis_buf
);
8291 goto decode_success
;
8297 /* Normal instruction handling starts here. */
8299 /* Deal with some but not all prefixes:
8302 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8303 Not dealt with (left in place):
8308 if (n_prefixes
> 7) goto decode_failure
;
8309 pre
= getUChar(delta
);
8318 case 0x3E: /* %DS: */
8319 case 0x26: /* %ES: */
8320 case 0x64: /* %FS: */
8321 case 0x65: /* %GS: */
8322 case 0x36: /* %SS: */
8324 goto decode_failure
; /* only one seg override allowed */
8327 case 0x2E: { /* %CS: */
8328 /* 2E prefix on a conditional branch instruction is a
8329 branch-prediction hint, which can safely be ignored. */
8330 UChar op1
= getIByte(delta
+1);
8331 UChar op2
= getIByte(delta
+2);
8332 if ((op1
>= 0x70 && op1
<= 0x7F)
8334 || (op1
== 0x0F && op2
>= 0x80 && op2
<= 0x8F)) {
8335 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8337 /* All other CS override cases are not handled */
8338 goto decode_failure
;
8351 /* Now we should be looking at the primary opcode byte or the
8352 leading F2 or F3. Check that any LOCK prefix is actually
8356 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
8359 *expect_CAS
= False
;
8360 goto decode_failure
;
8365 /* ---------------------------------------------------- */
8366 /* --- The SSE decoder. --- */
8367 /* ---------------------------------------------------- */
8369 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8372 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8373 later section, further on. */
8375 insn
= &guest_code
[delta
];
8377 /* Treat fxsave specially. It should be doable even on an SSE0
8378 (Pentium-II class) CPU. Hence be prepared to handle it on
8379 any subarchitecture variant.
8382 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8383 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
8384 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 0) {
8386 modrm
= getIByte(delta
+2);
8388 vassert(!epartIsReg(modrm
));
8390 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8392 gen_SEGV_if_not_16_aligned(addr
);
8394 DIP("fxsave %s\n", dis_buf
);
8396 /* Uses dirty helper:
8397 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8398 d
= unsafeIRDirty_0_N (
8400 "x86g_dirtyhelper_FXSAVE",
8401 &x86g_dirtyhelper_FXSAVE
,
8402 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
8405 /* declare we're writing memory */
8407 d
->mAddr
= mkexpr(addr
);
8408 d
->mSize
= 464; /* according to recent Intel docs */
8410 /* declare we're reading guest state */
8412 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
8414 d
->fxState
[0].fx
= Ifx_Read
;
8415 d
->fxState
[0].offset
= OFFB_FTOP
;
8416 d
->fxState
[0].size
= sizeof(UInt
);
8418 d
->fxState
[1].fx
= Ifx_Read
;
8419 d
->fxState
[1].offset
= OFFB_FPREGS
;
8420 d
->fxState
[1].size
= 8 * sizeof(ULong
);
8422 d
->fxState
[2].fx
= Ifx_Read
;
8423 d
->fxState
[2].offset
= OFFB_FPTAGS
;
8424 d
->fxState
[2].size
= 8 * sizeof(UChar
);
8426 d
->fxState
[3].fx
= Ifx_Read
;
8427 d
->fxState
[3].offset
= OFFB_FPROUND
;
8428 d
->fxState
[3].size
= sizeof(UInt
);
8430 d
->fxState
[4].fx
= Ifx_Read
;
8431 d
->fxState
[4].offset
= OFFB_FC3210
;
8432 d
->fxState
[4].size
= sizeof(UInt
);
8434 d
->fxState
[5].fx
= Ifx_Read
;
8435 d
->fxState
[5].offset
= OFFB_XMM0
;
8436 d
->fxState
[5].size
= 8 * sizeof(U128
);
8438 d
->fxState
[6].fx
= Ifx_Read
;
8439 d
->fxState
[6].offset
= OFFB_SSEROUND
;
8440 d
->fxState
[6].size
= sizeof(UInt
);
8442 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8443 images are packed back-to-back. If not, the value of
8444 d->fxState[5].size is wrong. */
8445 vassert(16 == sizeof(U128
));
8446 vassert(OFFB_XMM7
== (OFFB_XMM0
+ 7 * 16));
8448 stmt( IRStmt_Dirty(d
) );
8450 goto decode_success
;
8453 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8454 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
8455 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 1) {
8457 modrm
= getIByte(delta
+2);
8459 vassert(!epartIsReg(modrm
));
8461 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8463 gen_SEGV_if_not_16_aligned(addr
);
8465 DIP("fxrstor %s\n", dis_buf
);
8467 /* Uses dirty helper:
8468 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8470 the VexEmNote value is simply ignored (unlike for FRSTOR)
8472 d
= unsafeIRDirty_0_N (
8474 "x86g_dirtyhelper_FXRSTOR",
8475 &x86g_dirtyhelper_FXRSTOR
,
8476 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
8479 /* declare we're reading memory */
8481 d
->mAddr
= mkexpr(addr
);
8482 d
->mSize
= 464; /* according to recent Intel docs */
8484 /* declare we're writing guest state */
8486 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
8488 d
->fxState
[0].fx
= Ifx_Write
;
8489 d
->fxState
[0].offset
= OFFB_FTOP
;
8490 d
->fxState
[0].size
= sizeof(UInt
);
8492 d
->fxState
[1].fx
= Ifx_Write
;
8493 d
->fxState
[1].offset
= OFFB_FPREGS
;
8494 d
->fxState
[1].size
= 8 * sizeof(ULong
);
8496 d
->fxState
[2].fx
= Ifx_Write
;
8497 d
->fxState
[2].offset
= OFFB_FPTAGS
;
8498 d
->fxState
[2].size
= 8 * sizeof(UChar
);
8500 d
->fxState
[3].fx
= Ifx_Write
;
8501 d
->fxState
[3].offset
= OFFB_FPROUND
;
8502 d
->fxState
[3].size
= sizeof(UInt
);
8504 d
->fxState
[4].fx
= Ifx_Write
;
8505 d
->fxState
[4].offset
= OFFB_FC3210
;
8506 d
->fxState
[4].size
= sizeof(UInt
);
8508 d
->fxState
[5].fx
= Ifx_Write
;
8509 d
->fxState
[5].offset
= OFFB_XMM0
;
8510 d
->fxState
[5].size
= 8 * sizeof(U128
);
8512 d
->fxState
[6].fx
= Ifx_Write
;
8513 d
->fxState
[6].offset
= OFFB_SSEROUND
;
8514 d
->fxState
[6].size
= sizeof(UInt
);
8516 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8517 images are packed back-to-back. If not, the value of
8518 d->fxState[5].size is wrong. */
8519 vassert(16 == sizeof(U128
));
8520 vassert(OFFB_XMM7
== (OFFB_XMM0
+ 7 * 16));
8522 stmt( IRStmt_Dirty(d
) );
8524 goto decode_success
;
8527 /* ------ SSE decoder main ------ */
8529 /* Skip parts of the decoder which don't apply given the stated
8530 guest subarchitecture. */
8531 if (archinfo
->hwcaps
== 0/*baseline, no sse at all*/)
8532 goto after_sse_decoders
;
8534 /* With mmxext only some extended MMX instructions are recognized.
8535 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8536 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8537 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8539 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8540 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8542 if (archinfo
->hwcaps
== VEX_HWCAPS_X86_MMXEXT
/*integer only sse1 subset*/)
8545 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8548 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8549 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x58) {
8550 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "addps", Iop_Add32Fx4
);
8551 goto decode_success
;
8554 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8555 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x58) {
8557 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "addss", Iop_Add32F0x4
);
8558 goto decode_success
;
8561 /* 0F 55 = ANDNPS -- G = (not G) and E */
8562 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x55) {
8563 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "andnps", Iop_AndV128
);
8564 goto decode_success
;
8567 /* 0F 54 = ANDPS -- G = G and E */
8568 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x54) {
8569 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "andps", Iop_AndV128
);
8570 goto decode_success
;
8573 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8574 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC2) {
8575 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+2, "cmpps", True
, 4 );
8576 goto decode_success
;
8579 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8580 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xC2) {
8582 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+3, "cmpss", False
, 4 );
8583 goto decode_success
;
8586 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8587 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8588 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x2F || insn
[1] == 0x2E)) {
8589 IRTemp argL
= newTemp(Ity_F32
);
8590 IRTemp argR
= newTemp(Ity_F32
);
8591 modrm
= getIByte(delta
+2);
8592 if (epartIsReg(modrm
)) {
8593 assign( argR
, getXMMRegLane32F( eregOfRM(modrm
), 0/*lowest lane*/ ) );
8595 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
8596 nameXMMReg(gregOfRM(modrm
)) );
8598 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8599 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
8601 DIP("[u]comiss %s,%s\n", dis_buf
,
8602 nameXMMReg(gregOfRM(modrm
)) );
8604 assign( argL
, getXMMRegLane32F( gregOfRM(modrm
), 0/*lowest lane*/ ) );
8606 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
8607 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
8612 unop(Iop_F32toF64
,mkexpr(argL
)),
8613 unop(Iop_F32toF64
,mkexpr(argR
))),
8616 /* Set NDEP even though it isn't used. This makes redundant-PUT
8617 elimination of previous stores to this field work better. */
8618 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
8619 goto decode_success
;
8622 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8624 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x2A) {
8625 IRTemp arg64
= newTemp(Ity_I64
);
8626 IRTemp rmode
= newTemp(Ity_I32
);
8629 modrm
= getIByte(delta
+2);
8630 if (epartIsReg(modrm
)) {
8631 /* Only switch to MMX mode if the source is a MMX register.
8632 See comments on CVTPI2PD for details. Fixes #357059. */
8634 assign( arg64
, getMMXReg(eregOfRM(modrm
)) );
8636 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
8637 nameXMMReg(gregOfRM(modrm
)));
8639 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8640 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
8642 DIP("cvtpi2ps %s,%s\n", dis_buf
,
8643 nameXMMReg(gregOfRM(modrm
)) );
8646 assign( rmode
, get_sse_roundingmode() );
8653 unop(Iop_64to32
, mkexpr(arg64
)) )) );
8660 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
8662 goto decode_success
;
8665 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8667 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x2A) {
8668 IRTemp arg32
= newTemp(Ity_I32
);
8669 IRTemp rmode
= newTemp(Ity_I32
);
8672 modrm
= getIByte(delta
+3);
8673 if (epartIsReg(modrm
)) {
8674 assign( arg32
, getIReg(4, eregOfRM(modrm
)) );
8676 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm
)),
8677 nameXMMReg(gregOfRM(modrm
)));
8679 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
8680 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
8682 DIP("cvtsi2ss %s,%s\n", dis_buf
,
8683 nameXMMReg(gregOfRM(modrm
)) );
8686 assign( rmode
, get_sse_roundingmode() );
8692 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
8694 goto decode_success
;
8697 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8698 I32 in mmx, according to prevailing SSE rounding mode */
8699 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8700 I32 in mmx, rounding towards zero */
8701 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x2D || insn
[1] == 0x2C)) {
8702 IRTemp dst64
= newTemp(Ity_I64
);
8703 IRTemp rmode
= newTemp(Ity_I32
);
8704 IRTemp f32lo
= newTemp(Ity_F32
);
8705 IRTemp f32hi
= newTemp(Ity_F32
);
8706 Bool r2zero
= toBool(insn
[1] == 0x2C);
8709 modrm
= getIByte(delta
+2);
8711 if (epartIsReg(modrm
)) {
8713 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
8714 assign(f32hi
, getXMMRegLane32F(eregOfRM(modrm
), 1));
8715 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
8716 nameXMMReg(eregOfRM(modrm
)),
8717 nameMMXReg(gregOfRM(modrm
)));
8719 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8720 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
8721 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add32
,
8725 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
8727 nameMMXReg(gregOfRM(modrm
)));
8731 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
8733 assign( rmode
, get_sse_roundingmode() );
8738 binop( Iop_32HLto64
,
8739 binop( Iop_F64toI32S
,
8741 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
8742 binop( Iop_F64toI32S
,
8744 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
8748 putMMXReg(gregOfRM(modrm
), mkexpr(dst64
));
8749 goto decode_success
;
8752 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8753 I32 in ireg, according to prevailing SSE rounding mode */
8754 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8755 I32 in ireg, rounding towards zero */
8756 if (insn
[0] == 0xF3 && insn
[1] == 0x0F
8757 && (insn
[2] == 0x2D || insn
[2] == 0x2C)) {
8758 IRTemp rmode
= newTemp(Ity_I32
);
8759 IRTemp f32lo
= newTemp(Ity_F32
);
8760 Bool r2zero
= toBool(insn
[2] == 0x2C);
8763 modrm
= getIByte(delta
+3);
8764 if (epartIsReg(modrm
)) {
8766 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
8767 DIP("cvt%sss2si %s,%s\n", r2zero
? "t" : "",
8768 nameXMMReg(eregOfRM(modrm
)),
8769 nameIReg(4, gregOfRM(modrm
)));
8771 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
8772 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
8774 DIP("cvt%sss2si %s,%s\n", r2zero
? "t" : "",
8776 nameIReg(4, gregOfRM(modrm
)));
8780 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
8782 assign( rmode
, get_sse_roundingmode() );
8785 putIReg(4, gregOfRM(modrm
),
8786 binop( Iop_F64toI32S
,
8788 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
8791 goto decode_success
;
8794 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8795 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5E) {
8796 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "divps", Iop_Div32Fx4
);
8797 goto decode_success
;
8800 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8801 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5E) {
8803 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "divss", Iop_Div32F0x4
);
8804 goto decode_success
;
8807 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8808 if (insn
[0] == 0x0F && insn
[1] == 0xAE
8809 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 2) {
8811 IRTemp t64
= newTemp(Ity_I64
);
8812 IRTemp ew
= newTemp(Ity_I32
);
8814 modrm
= getIByte(delta
+2);
8815 vassert(!epartIsReg(modrm
));
8818 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8820 DIP("ldmxcsr %s\n", dis_buf
);
8822 /* The only thing we observe in %mxcsr is the rounding mode.
8823 Therefore, pass the 32-bit value (SSE native-format control
8824 word) to a clean helper, getting back a 64-bit value, the
8825 lower half of which is the SSEROUND value to store, and the
8826 upper half of which is the emulation-warning token which may
8829 /* ULong x86h_check_ldmxcsr ( UInt ); */
8830 assign( t64
, mkIRExprCCall(
8831 Ity_I64
, 0/*regparms*/,
8832 "x86g_check_ldmxcsr",
8833 &x86g_check_ldmxcsr
,
8834 mkIRExprVec_1( loadLE(Ity_I32
, mkexpr(addr
)) )
8838 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
8839 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
8840 put_emwarn( mkexpr(ew
) );
8841 /* Finally, if an emulation warning was reported, side-exit to
8842 the next insn, reporting the warning, so that Valgrind's
8843 dispatcher sees the warning. */
8846 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
8848 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
8852 goto decode_success
;
8856 /* mmxext sse1 subset starts here. mmxext only arches will parse
8857 only this subset of the sse1 instructions. */
8860 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8861 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8862 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF7) {
8864 delta
= dis_MMX( &ok
, sorb
, sz
, delta
+1 );
8866 goto decode_failure
;
8867 goto decode_success
;
8870 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8871 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8872 Intel manual does not say anything about the usual business of
8873 the FP reg tags getting trashed whenever an MMX insn happens.
8874 So we just leave them alone.
8876 if (insn
[0] == 0x0F && insn
[1] == 0xE7) {
8877 modrm
= getIByte(delta
+2);
8878 if (sz
== 4 && !epartIsReg(modrm
)) {
8879 /* do_MMX_preamble(); Intel docs don't specify this */
8880 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8881 storeLE( mkexpr(addr
), getMMXReg(gregOfRM(modrm
)) );
8882 DIP("movntq %s,%s\n", dis_buf
,
8883 nameMMXReg(gregOfRM(modrm
)));
8885 goto decode_success
;
8887 /* else fall through */
8890 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8891 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8892 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE0) {
8894 delta
= dis_MMXop_regmem_to_reg (
8895 sorb
, delta
+2, insn
[1], "pavgb", False
);
8896 goto decode_success
;
8899 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8900 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8901 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE3) {
8903 delta
= dis_MMXop_regmem_to_reg (
8904 sorb
, delta
+2, insn
[1], "pavgw", False
);
8905 goto decode_success
;
8908 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8909 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8910 zero-extend of it in ireg(G). */
8911 if (insn
[0] == 0x0F && insn
[1] == 0xC5) {
8913 if (sz
== 4 && epartIsReg(modrm
)) {
8914 IRTemp sV
= newTemp(Ity_I64
);
8915 t5
= newTemp(Ity_I16
);
8917 assign(sV
, getMMXReg(eregOfRM(modrm
)));
8918 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
8919 switch (insn
[3] & 3) {
8920 case 0: assign(t5
, mkexpr(t0
)); break;
8921 case 1: assign(t5
, mkexpr(t1
)); break;
8922 case 2: assign(t5
, mkexpr(t2
)); break;
8923 case 3: assign(t5
, mkexpr(t3
)); break;
8924 default: vassert(0); /*NOTREACHED*/
8926 putIReg(4, gregOfRM(modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
8927 DIP("pextrw $%d,%s,%s\n",
8928 (Int
)insn
[3], nameMMXReg(eregOfRM(modrm
)),
8929 nameIReg(4,gregOfRM(modrm
)));
8931 goto decode_success
;
8933 /* else fall through */
8936 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8937 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8938 put it into the specified lane of mmx(G). */
8939 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC4) {
8940 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8941 mmx reg. t4 is the new lane value. t5 is the original
8942 mmx value. t6 is the new mmx value. */
8944 t4
= newTemp(Ity_I16
);
8945 t5
= newTemp(Ity_I64
);
8946 t6
= newTemp(Ity_I64
);
8950 assign(t5
, getMMXReg(gregOfRM(modrm
)));
8951 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
8953 if (epartIsReg(modrm
)) {
8954 assign(t4
, getIReg(2, eregOfRM(modrm
)));
8957 DIP("pinsrw $%d,%s,%s\n", lane
,
8958 nameIReg(2,eregOfRM(modrm
)),
8959 nameMMXReg(gregOfRM(modrm
)));
8961 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8963 lane
= insn
[3+alen
-1];
8964 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
8965 DIP("pinsrw $%d,%s,%s\n", lane
,
8967 nameMMXReg(gregOfRM(modrm
)));
8971 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
8972 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
8973 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
8974 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
8975 default: vassert(0); /*NOTREACHED*/
8977 putMMXReg(gregOfRM(modrm
), mkexpr(t6
));
8978 goto decode_success
;
8981 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8982 /* 0F EE = PMAXSW -- 16x4 signed max */
8983 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xEE) {
8985 delta
= dis_MMXop_regmem_to_reg (
8986 sorb
, delta
+2, insn
[1], "pmaxsw", False
);
8987 goto decode_success
;
8990 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8991 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8992 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xDE) {
8994 delta
= dis_MMXop_regmem_to_reg (
8995 sorb
, delta
+2, insn
[1], "pmaxub", False
);
8996 goto decode_success
;
8999 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9000 /* 0F EA = PMINSW -- 16x4 signed min */
9001 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xEA) {
9003 delta
= dis_MMXop_regmem_to_reg (
9004 sorb
, delta
+2, insn
[1], "pminsw", False
);
9005 goto decode_success
;
9008 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9009 /* 0F DA = PMINUB -- 8x8 unsigned min */
9010 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xDA) {
9012 delta
= dis_MMXop_regmem_to_reg (
9013 sorb
, delta
+2, insn
[1], "pminub", False
);
9014 goto decode_success
;
9017 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9018 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9019 mmx(E), turn them into a byte, and put zero-extend of it in
9021 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xD7) {
9023 if (epartIsReg(modrm
)) {
9025 t0
= newTemp(Ity_I64
);
9026 t1
= newTemp(Ity_I32
);
9027 assign(t0
, getMMXReg(eregOfRM(modrm
)));
9028 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
9029 putIReg(4, gregOfRM(modrm
), mkexpr(t1
));
9030 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
9031 nameIReg(4,gregOfRM(modrm
)));
9033 goto decode_success
;
9035 /* else fall through */
9038 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9039 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9040 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE4) {
9042 delta
= dis_MMXop_regmem_to_reg (
9043 sorb
, delta
+2, insn
[1], "pmuluh", False
);
9044 goto decode_success
;
9047 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9048 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9049 /* 0F 18 /2 = PREFETCH1 */
9050 /* 0F 18 /3 = PREFETCH2 */
9051 if (insn
[0] == 0x0F && insn
[1] == 0x18
9052 && !epartIsReg(insn
[2])
9053 && gregOfRM(insn
[2]) >= 0 && gregOfRM(insn
[2]) <= 3) {
9054 const HChar
* hintstr
= "??";
9056 modrm
= getIByte(delta
+2);
9057 vassert(!epartIsReg(modrm
));
9059 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9062 switch (gregOfRM(modrm
)) {
9063 case 0: hintstr
= "nta"; break;
9064 case 1: hintstr
= "t0"; break;
9065 case 2: hintstr
= "t1"; break;
9066 case 3: hintstr
= "t2"; break;
9067 default: vassert(0); /*NOTREACHED*/
9070 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
9071 goto decode_success
;
9074 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9075 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9076 if (insn
[0] == 0x0F && insn
[1] == 0x0D
9077 && !epartIsReg(insn
[2])
9078 && gregOfRM(insn
[2]) >= 0 && gregOfRM(insn
[2]) <= 1) {
9079 const HChar
* hintstr
= "??";
9081 modrm
= getIByte(delta
+2);
9082 vassert(!epartIsReg(modrm
));
9084 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9087 switch (gregOfRM(modrm
)) {
9088 case 0: hintstr
= ""; break;
9089 case 1: hintstr
= "w"; break;
9090 default: vassert(0); /*NOTREACHED*/
9093 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
9094 goto decode_success
;
9097 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9098 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9099 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF6) {
9101 delta
= dis_MMXop_regmem_to_reg (
9102 sorb
, delta
+2, insn
[1], "psadbw", False
);
9103 goto decode_success
;
9106 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9107 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9108 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x70) {
9110 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
9111 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
9112 sV
= newTemp(Ity_I64
);
9113 dV
= newTemp(Ity_I64
);
9116 if (epartIsReg(modrm
)) {
9117 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
9118 order
= (Int
)insn
[3];
9120 DIP("pshufw $%d,%s,%s\n", order
,
9121 nameMMXReg(eregOfRM(modrm
)),
9122 nameMMXReg(gregOfRM(modrm
)));
9124 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9125 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
9126 order
= (Int
)insn
[2+alen
];
9128 DIP("pshufw $%d,%s,%s\n", order
,
9130 nameMMXReg(gregOfRM(modrm
)));
9132 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
9135 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9137 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
9138 SEL((order
>>2)&3), SEL((order
>>0)&3) )
9140 putMMXReg(gregOfRM(modrm
), mkexpr(dV
));
9142 goto decode_success
;
9145 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9146 if (insn
[0] == 0x0F && insn
[1] == 0xAE
9147 && epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 7) {
9150 /* Insert a memory fence. It's sometimes important that these
9151 are carried through to the generated code. */
9152 stmt( IRStmt_MBE(Imbe_Fence
) );
9154 goto decode_success
;
9157 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9158 if (archinfo
->hwcaps
== VEX_HWCAPS_X86_MMXEXT
/*integer only sse1 subset*/)
9159 goto after_sse_decoders
;
9162 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9163 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5F) {
9164 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "maxps", Iop_Max32Fx4
);
9165 goto decode_success
;
9168 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9169 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5F) {
9171 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "maxss", Iop_Max32F0x4
);
9172 goto decode_success
;
9175 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9176 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5D) {
9177 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "minps", Iop_Min32Fx4
);
9178 goto decode_success
;
9181 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9182 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5D) {
9184 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "minss", Iop_Min32F0x4
);
9185 goto decode_success
;
9188 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9189 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9190 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x28 || insn
[1] == 0x10)) {
9191 modrm
= getIByte(delta
+2);
9192 if (epartIsReg(modrm
)) {
9193 putXMMReg( gregOfRM(modrm
),
9194 getXMMReg( eregOfRM(modrm
) ));
9195 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9196 nameXMMReg(gregOfRM(modrm
)));
9199 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9200 if (insn
[1] == 0x28/*movaps*/)
9201 gen_SEGV_if_not_16_aligned( addr
);
9202 putXMMReg( gregOfRM(modrm
),
9203 loadLE(Ity_V128
, mkexpr(addr
)) );
9204 DIP("mov[ua]ps %s,%s\n", dis_buf
,
9205 nameXMMReg(gregOfRM(modrm
)));
9208 goto decode_success
;
9211 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9212 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9213 if (sz
== 4 && insn
[0] == 0x0F
9214 && (insn
[1] == 0x29 || insn
[1] == 0x11)) {
9215 modrm
= getIByte(delta
+2);
9216 if (epartIsReg(modrm
)) {
9217 /* fall through; awaiting test case */
9219 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9220 if (insn
[1] == 0x29/*movaps*/)
9221 gen_SEGV_if_not_16_aligned( addr
);
9222 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
9223 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
9226 goto decode_success
;
9230 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9231 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9232 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x16) {
9233 modrm
= getIByte(delta
+2);
9234 if (epartIsReg(modrm
)) {
9236 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
9237 getXMMRegLane64( eregOfRM(modrm
), 0 ) );
9238 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9239 nameXMMReg(gregOfRM(modrm
)));
9241 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9243 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
9244 loadLE(Ity_I64
, mkexpr(addr
)) );
9245 DIP("movhps %s,%s\n", dis_buf
,
9246 nameXMMReg( gregOfRM(modrm
) ));
9248 goto decode_success
;
9251 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9252 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x17) {
9253 if (!epartIsReg(insn
[2])) {
9255 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
9257 storeLE( mkexpr(addr
),
9258 getXMMRegLane64( gregOfRM(insn
[2]),
9259 1/*upper lane*/ ) );
9260 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn
[2]) ),
9262 goto decode_success
;
9264 /* else fall through */
9267 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9268 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9269 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x12) {
9270 modrm
= getIByte(delta
+2);
9271 if (epartIsReg(modrm
)) {
9273 putXMMRegLane64( gregOfRM(modrm
),
9275 getXMMRegLane64( eregOfRM(modrm
), 1 ));
9276 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm
)),
9277 nameXMMReg(gregOfRM(modrm
)));
9279 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9281 putXMMRegLane64( gregOfRM(modrm
), 0/*lower lane*/,
9282 loadLE(Ity_I64
, mkexpr(addr
)) );
9283 DIP("movlps %s, %s\n",
9284 dis_buf
, nameXMMReg( gregOfRM(modrm
) ));
9286 goto decode_success
;
9289 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9290 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x13) {
9291 if (!epartIsReg(insn
[2])) {
9293 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
9295 storeLE( mkexpr(addr
),
9296 getXMMRegLane64( gregOfRM(insn
[2]),
9297 0/*lower lane*/ ) );
9298 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn
[2]) ),
9300 goto decode_success
;
9302 /* else fall through */
9305 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9306 to 4 lowest bits of ireg(G) */
9307 if (insn
[0] == 0x0F && insn
[1] == 0x50) {
9308 modrm
= getIByte(delta
+2);
9309 if (sz
== 4 && epartIsReg(modrm
)) {
9311 t0
= newTemp(Ity_I32
);
9312 t1
= newTemp(Ity_I32
);
9313 t2
= newTemp(Ity_I32
);
9314 t3
= newTemp(Ity_I32
);
9316 src
= eregOfRM(modrm
);
9317 assign( t0
, binop( Iop_And32
,
9318 binop(Iop_Shr32
, getXMMRegLane32(src
,0), mkU8(31)),
9320 assign( t1
, binop( Iop_And32
,
9321 binop(Iop_Shr32
, getXMMRegLane32(src
,1), mkU8(30)),
9323 assign( t2
, binop( Iop_And32
,
9324 binop(Iop_Shr32
, getXMMRegLane32(src
,2), mkU8(29)),
9326 assign( t3
, binop( Iop_And32
,
9327 binop(Iop_Shr32
, getXMMRegLane32(src
,3), mkU8(28)),
9329 putIReg(4, gregOfRM(modrm
),
9331 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
9332 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
))
9335 DIP("movmskps %s,%s\n", nameXMMReg(src
),
9336 nameIReg(4, gregOfRM(modrm
)));
9337 goto decode_success
;
9339 /* else fall through */
9342 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9343 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9344 if (insn
[0] == 0x0F && insn
[1] == 0x2B) {
9345 modrm
= getIByte(delta
+2);
9346 if (!epartIsReg(modrm
)) {
9347 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9348 gen_SEGV_if_not_16_aligned( addr
);
9349 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
9350 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
9352 nameXMMReg(gregOfRM(modrm
)));
9354 goto decode_success
;
9356 /* else fall through */
9359 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9360 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9361 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x10) {
9363 modrm
= getIByte(delta
+3);
9364 if (epartIsReg(modrm
)) {
9365 putXMMRegLane32( gregOfRM(modrm
), 0,
9366 getXMMRegLane32( eregOfRM(modrm
), 0 ));
9367 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9368 nameXMMReg(gregOfRM(modrm
)));
9371 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9372 /* zero bits 127:64 */
9373 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
9374 /* zero bits 63:32 */
9375 putXMMRegLane32( gregOfRM(modrm
), 1, mkU32(0) );
9376 /* write bits 31:0 */
9377 putXMMRegLane32( gregOfRM(modrm
), 0,
9378 loadLE(Ity_I32
, mkexpr(addr
)) );
9379 DIP("movss %s,%s\n", dis_buf
,
9380 nameXMMReg(gregOfRM(modrm
)));
9383 goto decode_success
;
9386 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9388 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x11) {
9390 modrm
= getIByte(delta
+3);
9391 if (epartIsReg(modrm
)) {
9392 /* fall through, we don't yet have a test case */
9394 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9395 storeLE( mkexpr(addr
),
9396 getXMMRegLane32(gregOfRM(modrm
), 0) );
9397 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
9400 goto decode_success
;
9404 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9405 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x59) {
9406 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "mulps", Iop_Mul32Fx4
);
9407 goto decode_success
;
9410 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9411 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x59) {
9413 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "mulss", Iop_Mul32F0x4
);
9414 goto decode_success
;
9417 /* 0F 56 = ORPS -- G = G and E */
9418 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x56) {
9419 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "orps", Iop_OrV128
);
9420 goto decode_success
;
9423 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9424 if (insn
[0] == 0x0F && insn
[1] == 0x53) {
9426 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9427 "rcpps", Iop_RecipEst32Fx4
);
9428 goto decode_success
;
9431 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9432 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x53) {
9434 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9435 "rcpss", Iop_RecipEst32F0x4
);
9436 goto decode_success
;
9439 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9440 if (insn
[0] == 0x0F && insn
[1] == 0x52) {
9442 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9443 "rsqrtps", Iop_RSqrtEst32Fx4
);
9444 goto decode_success
;
9447 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9448 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x52) {
9450 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9451 "rsqrtss", Iop_RSqrtEst32F0x4
);
9452 goto decode_success
;
9455 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9456 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC6) {
9459 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
9460 sV
= newTemp(Ity_V128
);
9461 dV
= newTemp(Ity_V128
);
9462 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
9464 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
9466 if (epartIsReg(modrm
)) {
9467 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
9468 select
= (Int
)insn
[3];
9470 DIP("shufps $%d,%s,%s\n", select
,
9471 nameXMMReg(eregOfRM(modrm
)),
9472 nameXMMReg(gregOfRM(modrm
)));
9474 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9475 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9476 select
= (Int
)insn
[2+alen
];
9478 DIP("shufps $%d,%s,%s\n", select
,
9480 nameXMMReg(gregOfRM(modrm
)));
9483 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
9484 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
9486 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9487 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9491 mk128from32s( SELS((select
>>6)&3), SELS((select
>>4)&3),
9492 SELD((select
>>2)&3), SELD((select
>>0)&3) )
9498 goto decode_success
;
9501 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9502 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x51) {
9503 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9504 "sqrtps", Iop_Sqrt32Fx4
);
9505 goto decode_success
;
9508 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9509 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x51) {
9511 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9512 "sqrtss", Iop_Sqrt32F0x4
);
9513 goto decode_success
;
9516 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9517 if (insn
[0] == 0x0F && insn
[1] == 0xAE
9518 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 3) {
9519 modrm
= getIByte(delta
+2);
9521 vassert(!epartIsReg(modrm
));
9523 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9526 /* Fake up a native SSE mxcsr word. The only thing it depends
9527 on is SSEROUND[1:0], so call a clean helper to cook it up.
9529 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9530 DIP("stmxcsr %s\n", dis_buf
);
9531 storeLE( mkexpr(addr
),
9534 "x86g_create_mxcsr", &x86g_create_mxcsr
,
9535 mkIRExprVec_1( get_sse_roundingmode() )
9538 goto decode_success
;
9541 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9542 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5C) {
9543 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "subps", Iop_Sub32Fx4
);
9544 goto decode_success
;
9547 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9548 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5C) {
9550 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "subss", Iop_Sub32F0x4
);
9551 goto decode_success
;
9554 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9555 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9556 /* These just appear to be special cases of SHUFPS */
9557 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x15 || insn
[1] == 0x14)) {
9559 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
9560 Bool hi
= toBool(insn
[1] == 0x15);
9561 sV
= newTemp(Ity_V128
);
9562 dV
= newTemp(Ity_V128
);
9563 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
9565 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
9567 if (epartIsReg(modrm
)) {
9568 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
9570 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
9571 nameXMMReg(eregOfRM(modrm
)),
9572 nameXMMReg(gregOfRM(modrm
)));
9574 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9575 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9577 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
9579 nameXMMReg(gregOfRM(modrm
)));
9582 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
9583 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
9586 putXMMReg( gregOfRM(modrm
), mk128from32s( s3
, d3
, s2
, d2
) );
9588 putXMMReg( gregOfRM(modrm
), mk128from32s( s1
, d1
, s0
, d0
) );
9591 goto decode_success
;
9594 /* 0F 57 = XORPS -- G = G and E */
9595 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x57) {
9596 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "xorps", Iop_XorV128
);
9597 goto decode_success
;
9600 /* ---------------------------------------------------- */
9601 /* --- end of the SSE decoder. --- */
9602 /* ---------------------------------------------------- */
9604 /* ---------------------------------------------------- */
9605 /* --- start of the SSE2 decoder. --- */
9606 /* ---------------------------------------------------- */
9608 /* Skip parts of the decoder which don't apply given the stated
9609 guest subarchitecture. */
9610 if (0 == (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE2
))
9611 goto after_sse_decoders
; /* no SSE2 capabilities */
9613 insn
= &guest_code
[delta
];
9615 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9616 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x58) {
9617 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "addpd", Iop_Add64Fx2
);
9618 goto decode_success
;
9621 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9622 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x58) {
9624 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "addsd", Iop_Add64F0x2
);
9625 goto decode_success
;
9628 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9629 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x55) {
9630 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "andnpd", Iop_AndV128
);
9631 goto decode_success
;
9634 /* 66 0F 54 = ANDPD -- G = G and E */
9635 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x54) {
9636 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "andpd", Iop_AndV128
);
9637 goto decode_success
;
9640 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9641 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC2) {
9642 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+2, "cmppd", True
, 8 );
9643 goto decode_success
;
9646 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9647 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xC2) {
9649 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+3, "cmpsd", False
, 8 );
9650 goto decode_success
;
9653 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9654 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9655 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x2F || insn
[1] == 0x2E)) {
9656 IRTemp argL
= newTemp(Ity_F64
);
9657 IRTemp argR
= newTemp(Ity_F64
);
9658 modrm
= getIByte(delta
+2);
9659 if (epartIsReg(modrm
)) {
9660 assign( argR
, getXMMRegLane64F( eregOfRM(modrm
), 0/*lowest lane*/ ) );
9662 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9663 nameXMMReg(gregOfRM(modrm
)) );
9665 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9666 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
9668 DIP("[u]comisd %s,%s\n", dis_buf
,
9669 nameXMMReg(gregOfRM(modrm
)) );
9671 assign( argL
, getXMMRegLane64F( gregOfRM(modrm
), 0/*lowest lane*/ ) );
9673 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
9674 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
9678 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)),
9681 /* Set NDEP even though it isn't used. This makes redundant-PUT
9682 elimination of previous stores to this field work better. */
9683 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
9684 goto decode_success
;
9687 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9689 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xE6) {
9690 IRTemp arg64
= newTemp(Ity_I64
);
9693 modrm
= getIByte(delta
+3);
9694 if (epartIsReg(modrm
)) {
9695 assign( arg64
, getXMMRegLane64(eregOfRM(modrm
), 0) );
9697 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9698 nameXMMReg(gregOfRM(modrm
)));
9700 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9701 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
9703 DIP("cvtdq2pd %s,%s\n", dis_buf
,
9704 nameXMMReg(gregOfRM(modrm
)) );
9709 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
9714 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
9717 goto decode_success
;
9720 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9722 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5B) {
9723 IRTemp argV
= newTemp(Ity_V128
);
9724 IRTemp rmode
= newTemp(Ity_I32
);
9726 modrm
= getIByte(delta
+2);
9727 if (epartIsReg(modrm
)) {
9728 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9730 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9731 nameXMMReg(gregOfRM(modrm
)));
9733 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9734 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9736 DIP("cvtdq2ps %s,%s\n", dis_buf
,
9737 nameXMMReg(gregOfRM(modrm
)) );
9740 assign( rmode
, get_sse_roundingmode() );
9741 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
9743 # define CVT(_t) binop( Iop_F64toF32, \
9745 unop(Iop_I32StoF64,mkexpr(_t)))
9747 putXMMRegLane32F( gregOfRM(modrm
), 3, CVT(t3
) );
9748 putXMMRegLane32F( gregOfRM(modrm
), 2, CVT(t2
) );
9749 putXMMRegLane32F( gregOfRM(modrm
), 1, CVT(t1
) );
9750 putXMMRegLane32F( gregOfRM(modrm
), 0, CVT(t0
) );
9754 goto decode_success
;
9757 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9758 lo half xmm(G), and zero upper half */
9759 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xE6) {
9760 IRTemp argV
= newTemp(Ity_V128
);
9761 IRTemp rmode
= newTemp(Ity_I32
);
9764 modrm
= getIByte(delta
+3);
9765 if (epartIsReg(modrm
)) {
9766 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9768 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9769 nameXMMReg(gregOfRM(modrm
)));
9771 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9772 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9774 DIP("cvtpd2dq %s,%s\n", dis_buf
,
9775 nameXMMReg(gregOfRM(modrm
)) );
9778 assign( rmode
, get_sse_roundingmode() );
9779 t0
= newTemp(Ity_F64
);
9780 t1
= newTemp(Ity_F64
);
9781 assign( t0
, unop(Iop_ReinterpI64asF64
,
9782 unop(Iop_V128to64
, mkexpr(argV
))) );
9783 assign( t1
, unop(Iop_ReinterpI64asF64
,
9784 unop(Iop_V128HIto64
, mkexpr(argV
))) );
9786 # define CVT(_t) binop( Iop_F64toI32S, \
9790 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
9791 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
9792 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
9793 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
9797 goto decode_success
;
9800 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9801 I32 in mmx, according to prevailing SSE rounding mode */
9802 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9803 I32 in mmx, rounding towards zero */
9804 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x2D || insn
[1] == 0x2C)) {
9805 IRTemp dst64
= newTemp(Ity_I64
);
9806 IRTemp rmode
= newTemp(Ity_I32
);
9807 IRTemp f64lo
= newTemp(Ity_F64
);
9808 IRTemp f64hi
= newTemp(Ity_F64
);
9809 Bool r2zero
= toBool(insn
[1] == 0x2C);
9812 modrm
= getIByte(delta
+2);
9814 if (epartIsReg(modrm
)) {
9816 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
9817 assign(f64hi
, getXMMRegLane64F(eregOfRM(modrm
), 1));
9818 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
9819 nameXMMReg(eregOfRM(modrm
)),
9820 nameMMXReg(gregOfRM(modrm
)));
9822 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9823 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
9824 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add32
,
9828 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
9830 nameMMXReg(gregOfRM(modrm
)));
9834 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
9836 assign( rmode
, get_sse_roundingmode() );
9841 binop( Iop_32HLto64
,
9842 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
9843 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
9847 putMMXReg(gregOfRM(modrm
), mkexpr(dst64
));
9848 goto decode_success
;
9851 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9852 lo half xmm(G), and zero upper half */
9853 /* Note, this is practically identical to CVTPD2DQ. It would have
9854 been nicer to merge them together, but the insn[] offsets differ
9856 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5A) {
9857 IRTemp argV
= newTemp(Ity_V128
);
9858 IRTemp rmode
= newTemp(Ity_I32
);
9860 modrm
= getIByte(delta
+2);
9861 if (epartIsReg(modrm
)) {
9862 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9864 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9865 nameXMMReg(gregOfRM(modrm
)));
9867 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9868 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9870 DIP("cvtpd2ps %s,%s\n", dis_buf
,
9871 nameXMMReg(gregOfRM(modrm
)) );
9874 assign( rmode
, get_sse_roundingmode() );
9875 t0
= newTemp(Ity_F64
);
9876 t1
= newTemp(Ity_F64
);
9877 assign( t0
, unop(Iop_ReinterpI64asF64
,
9878 unop(Iop_V128to64
, mkexpr(argV
))) );
9879 assign( t1
, unop(Iop_ReinterpI64asF64
,
9880 unop(Iop_V128HIto64
, mkexpr(argV
))) );
9882 # define CVT(_t) binop( Iop_F64toF32, \
9886 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
9887 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
9888 putXMMRegLane32F( gregOfRM(modrm
), 1, CVT(t1
) );
9889 putXMMRegLane32F( gregOfRM(modrm
), 0, CVT(t0
) );
9893 goto decode_success
;
9896 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9898 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x2A) {
9899 IRTemp arg64
= newTemp(Ity_I64
);
9901 modrm
= getIByte(delta
+2);
9902 if (epartIsReg(modrm
)) {
9903 /* Only switch to MMX mode if the source is a MMX register.
9904 This is inconsistent with all other instructions which
9905 convert between XMM and (M64 or MMX), which always switch
9906 to MMX mode even if 64-bit operand is M64 and not MMX. At
9907 least, that's what the Intel docs seem to me to say.
9910 assign( arg64
, getMMXReg(eregOfRM(modrm
)) );
9912 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
9913 nameXMMReg(gregOfRM(modrm
)));
9915 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9916 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
9918 DIP("cvtpi2pd %s,%s\n", dis_buf
,
9919 nameXMMReg(gregOfRM(modrm
)) );
9924 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
9929 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
9932 goto decode_success
;
9935 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9937 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5B) {
9938 IRTemp argV
= newTemp(Ity_V128
);
9939 IRTemp rmode
= newTemp(Ity_I32
);
9941 modrm
= getIByte(delta
+2);
9942 if (epartIsReg(modrm
)) {
9943 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9945 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9946 nameXMMReg(gregOfRM(modrm
)));
9948 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9949 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9951 DIP("cvtps2dq %s,%s\n", dis_buf
,
9952 nameXMMReg(gregOfRM(modrm
)) );
9955 assign( rmode
, get_sse_roundingmode() );
9956 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
9958 /* This is less than ideal. If it turns out to be a performance
9959 bottleneck it can be improved. */
9961 binop( Iop_F64toI32S, \
9963 unop( Iop_F32toF64, \
9964 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9966 putXMMRegLane32( gregOfRM(modrm
), 3, CVT(t3
) );
9967 putXMMRegLane32( gregOfRM(modrm
), 2, CVT(t2
) );
9968 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
9969 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
9973 goto decode_success
;
9976 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9978 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5A) {
9979 IRTemp f32lo
= newTemp(Ity_F32
);
9980 IRTemp f32hi
= newTemp(Ity_F32
);
9982 modrm
= getIByte(delta
+2);
9983 if (epartIsReg(modrm
)) {
9984 assign( f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0) );
9985 assign( f32hi
, getXMMRegLane32F(eregOfRM(modrm
), 1) );
9987 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9988 nameXMMReg(gregOfRM(modrm
)));
9990 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9991 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
9992 assign( f32hi
, loadLE(Ity_F32
,
9993 binop(Iop_Add32
,mkexpr(addr
),mkU32(4))) );
9995 DIP("cvtps2pd %s,%s\n", dis_buf
,
9996 nameXMMReg(gregOfRM(modrm
)) );
9999 putXMMRegLane64F( gregOfRM(modrm
), 1,
10000 unop(Iop_F32toF64
, mkexpr(f32hi
)) );
10001 putXMMRegLane64F( gregOfRM(modrm
), 0,
10002 unop(Iop_F32toF64
, mkexpr(f32lo
)) );
10004 goto decode_success
;
10007 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
10008 I32 in ireg, according to prevailing SSE rounding mode */
10009 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
10010 I32 in ireg, rounding towards zero */
10011 if (insn
[0] == 0xF2 && insn
[1] == 0x0F
10012 && (insn
[2] == 0x2D || insn
[2] == 0x2C)) {
10013 IRTemp rmode
= newTemp(Ity_I32
);
10014 IRTemp f64lo
= newTemp(Ity_F64
);
10015 Bool r2zero
= toBool(insn
[2] == 0x2C);
10018 modrm
= getIByte(delta
+3);
10019 if (epartIsReg(modrm
)) {
10021 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
10022 DIP("cvt%ssd2si %s,%s\n", r2zero
? "t" : "",
10023 nameXMMReg(eregOfRM(modrm
)),
10024 nameIReg(4, gregOfRM(modrm
)));
10026 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10027 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10029 DIP("cvt%ssd2si %s,%s\n", r2zero
? "t" : "",
10031 nameIReg(4, gregOfRM(modrm
)));
10035 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10037 assign( rmode
, get_sse_roundingmode() );
10040 putIReg(4, gregOfRM(modrm
),
10041 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10043 goto decode_success
;
10046 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10047 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10048 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5A) {
10049 IRTemp rmode
= newTemp(Ity_I32
);
10050 IRTemp f64lo
= newTemp(Ity_F64
);
10053 modrm
= getIByte(delta
+3);
10054 if (epartIsReg(modrm
)) {
10056 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
10057 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10058 nameXMMReg(gregOfRM(modrm
)));
10060 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10061 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10063 DIP("cvtsd2ss %s,%s\n", dis_buf
,
10064 nameXMMReg(gregOfRM(modrm
)));
10067 assign( rmode
, get_sse_roundingmode() );
10069 gregOfRM(modrm
), 0,
10070 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
10073 goto decode_success
;
10076 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10078 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x2A) {
10079 IRTemp arg32
= newTemp(Ity_I32
);
10082 modrm
= getIByte(delta
+3);
10083 if (epartIsReg(modrm
)) {
10084 assign( arg32
, getIReg(4, eregOfRM(modrm
)) );
10086 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm
)),
10087 nameXMMReg(gregOfRM(modrm
)));
10089 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10090 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
10092 DIP("cvtsi2sd %s,%s\n", dis_buf
,
10093 nameXMMReg(gregOfRM(modrm
)) );
10097 gregOfRM(modrm
), 0,
10098 unop(Iop_I32StoF64
, mkexpr(arg32
)) );
10100 goto decode_success
;
10103 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10105 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5A) {
10106 IRTemp f32lo
= newTemp(Ity_F32
);
10109 modrm
= getIByte(delta
+3);
10110 if (epartIsReg(modrm
)) {
10112 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
10113 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10114 nameXMMReg(gregOfRM(modrm
)));
10116 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10117 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10119 DIP("cvtss2sd %s,%s\n", dis_buf
,
10120 nameXMMReg(gregOfRM(modrm
)));
10123 putXMMRegLane64F( gregOfRM(modrm
), 0,
10124 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
10126 goto decode_success
;
10129 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10130 lo half xmm(G), and zero upper half, rounding towards zero */
10131 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE6) {
10132 IRTemp argV
= newTemp(Ity_V128
);
10133 IRTemp rmode
= newTemp(Ity_I32
);
10135 modrm
= getIByte(delta
+2);
10136 if (epartIsReg(modrm
)) {
10137 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
10139 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10140 nameXMMReg(gregOfRM(modrm
)));
10142 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10143 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10145 DIP("cvttpd2dq %s,%s\n", dis_buf
,
10146 nameXMMReg(gregOfRM(modrm
)) );
10149 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10151 t0
= newTemp(Ity_F64
);
10152 t1
= newTemp(Ity_F64
);
10153 assign( t0
, unop(Iop_ReinterpI64asF64
,
10154 unop(Iop_V128to64
, mkexpr(argV
))) );
10155 assign( t1
, unop(Iop_ReinterpI64asF64
,
10156 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10158 # define CVT(_t) binop( Iop_F64toI32S, \
10162 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
10163 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
10164 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
10165 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
10169 goto decode_success
;
10172 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10173 xmm(G), rounding towards zero */
10174 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5B) {
10175 IRTemp argV
= newTemp(Ity_V128
);
10176 IRTemp rmode
= newTemp(Ity_I32
);
10179 modrm
= getIByte(delta
+3);
10180 if (epartIsReg(modrm
)) {
10181 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
10183 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10184 nameXMMReg(gregOfRM(modrm
)));
10186 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10187 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10189 DIP("cvttps2dq %s,%s\n", dis_buf
,
10190 nameXMMReg(gregOfRM(modrm
)) );
10193 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10194 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
10196 /* This is less than ideal. If it turns out to be a performance
10197 bottleneck it can be improved. */
10199 binop( Iop_F64toI32S, \
10201 unop( Iop_F32toF64, \
10202 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10204 putXMMRegLane32( gregOfRM(modrm
), 3, CVT(t3
) );
10205 putXMMRegLane32( gregOfRM(modrm
), 2, CVT(t2
) );
10206 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
10207 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
10211 goto decode_success
;
10214 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10215 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5E) {
10216 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "divpd", Iop_Div64Fx2
);
10217 goto decode_success
;
10220 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10221 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5E) {
10223 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "divsd", Iop_Div64F0x2
);
10224 goto decode_success
;
10227 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10228 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10229 if (insn
[0] == 0x0F && insn
[1] == 0xAE
10230 && epartIsReg(insn
[2])
10231 && (gregOfRM(insn
[2]) == 5 || gregOfRM(insn
[2]) == 6)) {
10234 /* Insert a memory fence. It's sometimes important that these
10235 are carried through to the generated code. */
10236 stmt( IRStmt_MBE(Imbe_Fence
) );
10237 DIP("%sfence\n", gregOfRM(insn
[2])==5 ? "l" : "m");
10238 goto decode_success
;
10241 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10242 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5F) {
10243 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "maxpd", Iop_Max64Fx2
);
10244 goto decode_success
;
10247 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10248 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5F) {
10250 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "maxsd", Iop_Max64F0x2
);
10251 goto decode_success
;
10254 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10255 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5D) {
10256 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "minpd", Iop_Min64Fx2
);
10257 goto decode_success
;
10260 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10261 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5D) {
10263 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "minsd", Iop_Min64F0x2
);
10264 goto decode_success
;
10267 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10268 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10269 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10270 if (sz
== 2 && insn
[0] == 0x0F
10271 && (insn
[1] == 0x28 || insn
[1] == 0x10 || insn
[1] == 0x6F)) {
10272 const HChar
* wot
= insn
[1]==0x28 ? "apd" :
10273 insn
[1]==0x10 ? "upd" : "dqa";
10274 modrm
= getIByte(delta
+2);
10275 if (epartIsReg(modrm
)) {
10276 putXMMReg( gregOfRM(modrm
),
10277 getXMMReg( eregOfRM(modrm
) ));
10278 DIP("mov%s %s,%s\n", wot
, nameXMMReg(eregOfRM(modrm
)),
10279 nameXMMReg(gregOfRM(modrm
)));
10282 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10283 if (insn
[1] == 0x28/*movapd*/ || insn
[1] == 0x6F/*movdqa*/)
10284 gen_SEGV_if_not_16_aligned( addr
);
10285 putXMMReg( gregOfRM(modrm
),
10286 loadLE(Ity_V128
, mkexpr(addr
)) );
10287 DIP("mov%s %s,%s\n", wot
, dis_buf
,
10288 nameXMMReg(gregOfRM(modrm
)));
10291 goto decode_success
;
10294 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10295 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10296 if (sz
== 2 && insn
[0] == 0x0F
10297 && (insn
[1] == 0x29 || insn
[1] == 0x11)) {
10298 const HChar
* wot
= insn
[1]==0x29 ? "apd" : "upd";
10299 modrm
= getIByte(delta
+2);
10300 if (epartIsReg(modrm
)) {
10301 /* fall through; awaiting test case */
10303 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10304 if (insn
[1] == 0x29/*movapd*/)
10305 gen_SEGV_if_not_16_aligned( addr
);
10306 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10307 DIP("mov%s %s,%s\n", wot
, nameXMMReg(gregOfRM(modrm
)),
10310 goto decode_success
;
10314 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10315 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6E) {
10316 modrm
= getIByte(delta
+2);
10317 if (epartIsReg(modrm
)) {
10321 unop( Iop_32UtoV128
, getIReg(4, eregOfRM(modrm
)) )
10323 DIP("movd %s, %s\n",
10324 nameIReg(4,eregOfRM(modrm
)), nameXMMReg(gregOfRM(modrm
)));
10326 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10330 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
10332 DIP("movd %s, %s\n", dis_buf
, nameXMMReg(gregOfRM(modrm
)));
10334 goto decode_success
;
10337 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10338 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x7E) {
10339 modrm
= getIByte(delta
+2);
10340 if (epartIsReg(modrm
)) {
10342 putIReg( 4, eregOfRM(modrm
),
10343 getXMMRegLane32(gregOfRM(modrm
), 0) );
10344 DIP("movd %s, %s\n",
10345 nameXMMReg(gregOfRM(modrm
)), nameIReg(4,eregOfRM(modrm
)));
10347 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10349 storeLE( mkexpr(addr
),
10350 getXMMRegLane32(gregOfRM(modrm
), 0) );
10351 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10353 goto decode_success
;
10356 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10357 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x7F) {
10358 modrm
= getIByte(delta
+2);
10359 if (epartIsReg(modrm
)) {
10361 putXMMReg( eregOfRM(modrm
),
10362 getXMMReg(gregOfRM(modrm
)) );
10363 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm
)),
10364 nameXMMReg(eregOfRM(modrm
)));
10366 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10368 gen_SEGV_if_not_16_aligned( addr
);
10369 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10370 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10372 goto decode_success
;
10375 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10376 /* Unfortunately can't simply use the MOVDQA case since the
10377 prefix lengths are different (66 vs F3) */
10378 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x6F) {
10380 modrm
= getIByte(delta
+3);
10381 if (epartIsReg(modrm
)) {
10382 putXMMReg( gregOfRM(modrm
),
10383 getXMMReg( eregOfRM(modrm
) ));
10384 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10385 nameXMMReg(gregOfRM(modrm
)));
10388 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10389 putXMMReg( gregOfRM(modrm
),
10390 loadLE(Ity_V128
, mkexpr(addr
)) );
10391 DIP("movdqu %s,%s\n", dis_buf
,
10392 nameXMMReg(gregOfRM(modrm
)));
10395 goto decode_success
;
10398 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10399 /* Unfortunately can't simply use the MOVDQA case since the
10400 prefix lengths are different (66 vs F3) */
10401 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x7F) {
10403 modrm
= getIByte(delta
+3);
10404 if (epartIsReg(modrm
)) {
10406 putXMMReg( eregOfRM(modrm
),
10407 getXMMReg(gregOfRM(modrm
)) );
10408 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm
)),
10409 nameXMMReg(eregOfRM(modrm
)));
10411 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
10413 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10414 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10416 goto decode_success
;
10419 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10420 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xD6) {
10422 modrm
= getIByte(delta
+3);
10423 if (epartIsReg(modrm
)) {
10425 putMMXReg( gregOfRM(modrm
),
10426 getXMMRegLane64( eregOfRM(modrm
), 0 ));
10427 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10428 nameMMXReg(gregOfRM(modrm
)));
10430 goto decode_success
;
10432 /* fall through, apparently no mem case for this insn */
10436 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10437 /* These seems identical to MOVHPS. This instruction encoding is
10438 completely crazy. */
10439 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x16) {
10440 modrm
= getIByte(delta
+2);
10441 if (epartIsReg(modrm
)) {
10442 /* fall through; apparently reg-reg is not possible */
10444 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10446 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
10447 loadLE(Ity_I64
, mkexpr(addr
)) );
10448 DIP("movhpd %s,%s\n", dis_buf
,
10449 nameXMMReg( gregOfRM(modrm
) ));
10450 goto decode_success
;
10454 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10455 /* Again, this seems identical to MOVHPS. */
10456 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x17) {
10457 if (!epartIsReg(insn
[2])) {
10459 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
10461 storeLE( mkexpr(addr
),
10462 getXMMRegLane64( gregOfRM(insn
[2]),
10463 1/*upper lane*/ ) );
10464 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn
[2]) ),
10466 goto decode_success
;
10468 /* else fall through */
10471 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10472 /* Identical to MOVLPS ? */
10473 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x12) {
10474 modrm
= getIByte(delta
+2);
10475 if (epartIsReg(modrm
)) {
10476 /* fall through; apparently reg-reg is not possible */
10478 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10480 putXMMRegLane64( gregOfRM(modrm
), 0/*lower lane*/,
10481 loadLE(Ity_I64
, mkexpr(addr
)) );
10482 DIP("movlpd %s, %s\n",
10483 dis_buf
, nameXMMReg( gregOfRM(modrm
) ));
10484 goto decode_success
;
10488 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10489 /* Identical to MOVLPS ? */
10490 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x13) {
10491 if (!epartIsReg(insn
[2])) {
10493 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
10495 storeLE( mkexpr(addr
),
10496 getXMMRegLane64( gregOfRM(insn
[2]),
10497 0/*lower lane*/ ) );
10498 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn
[2]) ),
10500 goto decode_success
;
10502 /* else fall through */
10505 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10506 2 lowest bits of ireg(G) */
10507 if (insn
[0] == 0x0F && insn
[1] == 0x50) {
10508 modrm
= getIByte(delta
+2);
10509 if (sz
== 2 && epartIsReg(modrm
)) {
10511 t0
= newTemp(Ity_I32
);
10512 t1
= newTemp(Ity_I32
);
10514 src
= eregOfRM(modrm
);
10515 assign( t0
, binop( Iop_And32
,
10516 binop(Iop_Shr32
, getXMMRegLane32(src
,1), mkU8(31)),
10518 assign( t1
, binop( Iop_And32
,
10519 binop(Iop_Shr32
, getXMMRegLane32(src
,3), mkU8(30)),
10521 putIReg(4, gregOfRM(modrm
),
10522 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
))
10524 DIP("movmskpd %s,%s\n", nameXMMReg(src
),
10525 nameIReg(4, gregOfRM(modrm
)));
10526 goto decode_success
;
10528 /* else fall through */
10531 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10532 if (insn
[0] == 0x0F && insn
[1] == 0xF7) {
10533 modrm
= getIByte(delta
+2);
10534 if (sz
== 2 && epartIsReg(modrm
)) {
10535 IRTemp regD
= newTemp(Ity_V128
);
10536 IRTemp mask
= newTemp(Ity_V128
);
10537 IRTemp olddata
= newTemp(Ity_V128
);
10538 IRTemp newdata
= newTemp(Ity_V128
);
10539 addr
= newTemp(Ity_I32
);
10541 assign( addr
, handleSegOverride( sorb
, getIReg(4, R_EDI
) ));
10542 assign( regD
, getXMMReg( gregOfRM(modrm
) ));
10544 /* Unfortunately can't do the obvious thing with SarN8x16
10545 here since that can't be re-emitted as SSE2 code - no such
10549 binop(Iop_64HLtoV128
,
10551 getXMMRegLane64( eregOfRM(modrm
), 1 ),
10554 getXMMRegLane64( eregOfRM(modrm
), 0 ),
10556 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
10564 unop(Iop_NotV128
, mkexpr(mask
)))) );
10565 storeLE( mkexpr(addr
), mkexpr(newdata
) );
10568 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm
) ),
10569 nameXMMReg( gregOfRM(modrm
) ) );
10570 goto decode_success
;
10572 /* else fall through */
10575 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10576 if (insn
[0] == 0x0F && insn
[1] == 0xE7) {
10577 modrm
= getIByte(delta
+2);
10578 if (sz
== 2 && !epartIsReg(modrm
)) {
10579 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10580 gen_SEGV_if_not_16_aligned( addr
);
10581 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10582 DIP("movntdq %s,%s\n", dis_buf
,
10583 nameXMMReg(gregOfRM(modrm
)));
10585 goto decode_success
;
10587 /* else fall through */
10590 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10591 if (insn
[0] == 0x0F && insn
[1] == 0xC3) {
10593 modrm
= getIByte(delta
+2);
10594 if (!epartIsReg(modrm
)) {
10595 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10596 storeLE( mkexpr(addr
), getIReg(4, gregOfRM(modrm
)) );
10597 DIP("movnti %s,%s\n", dis_buf
,
10598 nameIReg(4, gregOfRM(modrm
)));
10600 goto decode_success
;
10602 /* else fall through */
10605 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10606 or lo half xmm). */
10607 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD6) {
10608 modrm
= getIByte(delta
+2);
10609 if (epartIsReg(modrm
)) {
10610 /* fall through, awaiting test case */
10611 /* dst: lo half copied, hi half zeroed */
10613 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10614 storeLE( mkexpr(addr
),
10615 getXMMRegLane64( gregOfRM(modrm
), 0 ));
10616 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10618 goto decode_success
;
10622 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10624 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xD6) {
10626 modrm
= getIByte(delta
+3);
10627 if (epartIsReg(modrm
)) {
10629 putXMMReg( gregOfRM(modrm
),
10630 unop(Iop_64UtoV128
, getMMXReg( eregOfRM(modrm
) )) );
10631 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
10632 nameXMMReg(gregOfRM(modrm
)));
10634 goto decode_success
;
10636 /* fall through, apparently no mem case for this insn */
10640 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10641 G (lo half xmm). Upper half of G is zeroed out. */
10642 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10643 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10644 If E is reg, upper half of G is unchanged. */
10645 if ((insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x10)
10646 || (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x7E)) {
10648 modrm
= getIByte(delta
+3);
10649 if (epartIsReg(modrm
)) {
10650 putXMMRegLane64( gregOfRM(modrm
), 0,
10651 getXMMRegLane64( eregOfRM(modrm
), 0 ));
10652 if (insn
[0] == 0xF3/*MOVQ*/) {
10653 /* zero bits 127:64 */
10654 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
10656 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10657 nameXMMReg(gregOfRM(modrm
)));
10660 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10661 /* zero bits 127:64 */
10662 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
10663 /* write bits 63:0 */
10664 putXMMRegLane64( gregOfRM(modrm
), 0,
10665 loadLE(Ity_I64
, mkexpr(addr
)) );
10666 DIP("movsd %s,%s\n", dis_buf
,
10667 nameXMMReg(gregOfRM(modrm
)));
10670 goto decode_success
;
10673 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10674 or lo half xmm). */
10675 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x11) {
10677 modrm
= getIByte(delta
+3);
10678 if (epartIsReg(modrm
)) {
10679 putXMMRegLane64( eregOfRM(modrm
), 0,
10680 getXMMRegLane64( gregOfRM(modrm
), 0 ));
10681 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
10682 nameXMMReg(eregOfRM(modrm
)));
10685 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10686 storeLE( mkexpr(addr
),
10687 getXMMRegLane64(gregOfRM(modrm
), 0) );
10688 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
10692 goto decode_success
;
10695 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10696 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x59) {
10697 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "mulpd", Iop_Mul64Fx2
);
10698 goto decode_success
;
10701 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10702 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x59) {
10704 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "mulsd", Iop_Mul64F0x2
);
10705 goto decode_success
;
10708 /* 66 0F 56 = ORPD -- G = G and E */
10709 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x56) {
10710 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "orpd", Iop_OrV128
);
10711 goto decode_success
;
10714 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10715 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC6) {
10717 IRTemp sV
= newTemp(Ity_V128
);
10718 IRTemp dV
= newTemp(Ity_V128
);
10719 IRTemp s1
= newTemp(Ity_I64
);
10720 IRTemp s0
= newTemp(Ity_I64
);
10721 IRTemp d1
= newTemp(Ity_I64
);
10722 IRTemp d0
= newTemp(Ity_I64
);
10725 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
10727 if (epartIsReg(modrm
)) {
10728 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
10729 select
= (Int
)insn
[3];
10731 DIP("shufpd $%d,%s,%s\n", select
,
10732 nameXMMReg(eregOfRM(modrm
)),
10733 nameXMMReg(gregOfRM(modrm
)));
10735 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10736 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10737 select
= (Int
)insn
[2+alen
];
10739 DIP("shufpd $%d,%s,%s\n", select
,
10741 nameXMMReg(gregOfRM(modrm
)));
10744 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10745 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
10746 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10747 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
10749 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10750 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10754 binop(Iop_64HLtoV128
, SELS((select
>>1)&1), SELD((select
>>0)&1) )
10760 goto decode_success
;
10763 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10764 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x51) {
10765 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
10766 "sqrtpd", Iop_Sqrt64Fx2
);
10767 goto decode_success
;
10770 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10771 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x51) {
10773 delta
= dis_SSE_E_to_G_unary_lo64( sorb
, delta
+3,
10774 "sqrtsd", Iop_Sqrt64F0x2
);
10775 goto decode_success
;
10778 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10779 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5C) {
10780 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "subpd", Iop_Sub64Fx2
);
10781 goto decode_success
;
10784 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10785 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5C) {
10787 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "subsd", Iop_Sub64F0x2
);
10788 goto decode_success
;
10791 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10792 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10793 /* These just appear to be special cases of SHUFPS */
10794 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x15 || insn
[1] == 0x14)) {
10795 IRTemp s1
= newTemp(Ity_I64
);
10796 IRTemp s0
= newTemp(Ity_I64
);
10797 IRTemp d1
= newTemp(Ity_I64
);
10798 IRTemp d0
= newTemp(Ity_I64
);
10799 IRTemp sV
= newTemp(Ity_V128
);
10800 IRTemp dV
= newTemp(Ity_V128
);
10801 Bool hi
= toBool(insn
[1] == 0x15);
10804 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
10806 if (epartIsReg(modrm
)) {
10807 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
10809 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
10810 nameXMMReg(eregOfRM(modrm
)),
10811 nameXMMReg(gregOfRM(modrm
)));
10813 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10814 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10816 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
10818 nameXMMReg(gregOfRM(modrm
)));
10821 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10822 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
10823 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10824 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
10827 putXMMReg( gregOfRM(modrm
),
10828 binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
10830 putXMMReg( gregOfRM(modrm
),
10831 binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
10834 goto decode_success
;
10837 /* 66 0F 57 = XORPD -- G = G and E */
10838 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x57) {
10839 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "xorpd", Iop_XorV128
);
10840 goto decode_success
;
10843 /* 66 0F 6B = PACKSSDW */
10844 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6B) {
10845 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10847 Iop_QNarrowBin32Sto16Sx8
, True
);
10848 goto decode_success
;
10851 /* 66 0F 63 = PACKSSWB */
10852 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x63) {
10853 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10855 Iop_QNarrowBin16Sto8Sx16
, True
);
10856 goto decode_success
;
10859 /* 66 0F 67 = PACKUSWB */
10860 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x67) {
10861 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10863 Iop_QNarrowBin16Sto8Ux16
, True
);
10864 goto decode_success
;
10867 /* 66 0F FC = PADDB */
10868 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFC) {
10869 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10870 "paddb", Iop_Add8x16
, False
);
10871 goto decode_success
;
10874 /* 66 0F FE = PADDD */
10875 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFE) {
10876 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10877 "paddd", Iop_Add32x4
, False
);
10878 goto decode_success
;
10881 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10882 /* 0F D4 = PADDQ -- add 64x1 */
10883 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xD4) {
10885 delta
= dis_MMXop_regmem_to_reg (
10886 sorb
, delta
+2, insn
[1], "paddq", False
);
10887 goto decode_success
;
10890 /* 66 0F D4 = PADDQ */
10891 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD4) {
10892 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10893 "paddq", Iop_Add64x2
, False
);
10894 goto decode_success
;
10897 /* 66 0F FD = PADDW */
10898 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFD) {
10899 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10900 "paddw", Iop_Add16x8
, False
);
10901 goto decode_success
;
10904 /* 66 0F EC = PADDSB */
10905 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEC) {
10906 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10907 "paddsb", Iop_QAdd8Sx16
, False
);
10908 goto decode_success
;
10911 /* 66 0F ED = PADDSW */
10912 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xED) {
10913 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10914 "paddsw", Iop_QAdd16Sx8
, False
);
10915 goto decode_success
;
10918 /* 66 0F DC = PADDUSB */
10919 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDC) {
10920 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10921 "paddusb", Iop_QAdd8Ux16
, False
);
10922 goto decode_success
;
10925 /* 66 0F DD = PADDUSW */
10926 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDD) {
10927 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10928 "paddusw", Iop_QAdd16Ux8
, False
);
10929 goto decode_success
;
10932 /* 66 0F DB = PAND */
10933 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDB) {
10934 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "pand", Iop_AndV128
);
10935 goto decode_success
;
10938 /* 66 0F DF = PANDN */
10939 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDF) {
10940 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "pandn", Iop_AndV128
);
10941 goto decode_success
;
10944 /* 66 0F E0 = PAVGB */
10945 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE0) {
10946 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10947 "pavgb", Iop_Avg8Ux16
, False
);
10948 goto decode_success
;
10951 /* 66 0F E3 = PAVGW */
10952 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE3) {
10953 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10954 "pavgw", Iop_Avg16Ux8
, False
);
10955 goto decode_success
;
10958 /* 66 0F 74 = PCMPEQB */
10959 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x74) {
10960 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10961 "pcmpeqb", Iop_CmpEQ8x16
, False
);
10962 goto decode_success
;
10965 /* 66 0F 76 = PCMPEQD */
10966 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x76) {
10967 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10968 "pcmpeqd", Iop_CmpEQ32x4
, False
);
10969 goto decode_success
;
10972 /* 66 0F 75 = PCMPEQW */
10973 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x75) {
10974 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10975 "pcmpeqw", Iop_CmpEQ16x8
, False
);
10976 goto decode_success
;
10979 /* 66 0F 64 = PCMPGTB */
10980 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x64) {
10981 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10982 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
10983 goto decode_success
;
10986 /* 66 0F 66 = PCMPGTD */
10987 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x66) {
10988 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10989 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
10990 goto decode_success
;
10993 /* 66 0F 65 = PCMPGTW */
10994 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x65) {
10995 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10996 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
10997 goto decode_success
;
11000 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
11001 zero-extend of it in ireg(G). */
11002 if (insn
[0] == 0x0F && insn
[1] == 0xC5) {
11004 if (sz
== 2 && epartIsReg(modrm
)) {
11005 t5
= newTemp(Ity_V128
);
11006 t4
= newTemp(Ity_I16
);
11007 assign(t5
, getXMMReg(eregOfRM(modrm
)));
11008 breakup128to32s( t5
, &t3
, &t2
, &t1
, &t0
);
11009 switch (insn
[3] & 7) {
11010 case 0: assign(t4
, unop(Iop_32to16
, mkexpr(t0
))); break;
11011 case 1: assign(t4
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
11012 case 2: assign(t4
, unop(Iop_32to16
, mkexpr(t1
))); break;
11013 case 3: assign(t4
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
11014 case 4: assign(t4
, unop(Iop_32to16
, mkexpr(t2
))); break;
11015 case 5: assign(t4
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
11016 case 6: assign(t4
, unop(Iop_32to16
, mkexpr(t3
))); break;
11017 case 7: assign(t4
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
11018 default: vassert(0); /*NOTREACHED*/
11020 putIReg(4, gregOfRM(modrm
), unop(Iop_16Uto32
, mkexpr(t4
)));
11021 DIP("pextrw $%d,%s,%s\n",
11022 (Int
)insn
[3], nameXMMReg(eregOfRM(modrm
)),
11023 nameIReg(4,gregOfRM(modrm
)));
11025 goto decode_success
;
11027 /* else fall through */
11030 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11031 put it into the specified lane of xmm(G). */
11032 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC4) {
11034 t4
= newTemp(Ity_I16
);
11037 if (epartIsReg(modrm
)) {
11038 assign(t4
, getIReg(2, eregOfRM(modrm
)));
11040 lane
= insn
[3+1-1];
11041 DIP("pinsrw $%d,%s,%s\n", lane
,
11042 nameIReg(2,eregOfRM(modrm
)),
11043 nameXMMReg(gregOfRM(modrm
)));
11045 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11047 lane
= insn
[3+alen
-1];
11048 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
11049 DIP("pinsrw $%d,%s,%s\n", lane
,
11051 nameXMMReg(gregOfRM(modrm
)));
11054 putXMMRegLane16( gregOfRM(modrm
), lane
& 7, mkexpr(t4
) );
11055 goto decode_success
;
11058 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11059 E(xmm or mem) to G(xmm) */
11060 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF5) {
11061 IRTemp s1V
= newTemp(Ity_V128
);
11062 IRTemp s2V
= newTemp(Ity_V128
);
11063 IRTemp dV
= newTemp(Ity_V128
);
11064 IRTemp s1Hi
= newTemp(Ity_I64
);
11065 IRTemp s1Lo
= newTemp(Ity_I64
);
11066 IRTemp s2Hi
= newTemp(Ity_I64
);
11067 IRTemp s2Lo
= newTemp(Ity_I64
);
11068 IRTemp dHi
= newTemp(Ity_I64
);
11069 IRTemp dLo
= newTemp(Ity_I64
);
11071 if (epartIsReg(modrm
)) {
11072 assign( s1V
, getXMMReg(eregOfRM(modrm
)) );
11074 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11075 nameXMMReg(gregOfRM(modrm
)));
11077 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11078 assign( s1V
, loadLE(Ity_V128
, mkexpr(addr
)) );
11080 DIP("pmaddwd %s,%s\n", dis_buf
,
11081 nameXMMReg(gregOfRM(modrm
)));
11083 assign( s2V
, getXMMReg(gregOfRM(modrm
)) );
11084 assign( s1Hi
, unop(Iop_V128HIto64
, mkexpr(s1V
)) );
11085 assign( s1Lo
, unop(Iop_V128to64
, mkexpr(s1V
)) );
11086 assign( s2Hi
, unop(Iop_V128HIto64
, mkexpr(s2V
)) );
11087 assign( s2Lo
, unop(Iop_V128to64
, mkexpr(s2V
)) );
11088 assign( dHi
, mkIRExprCCall(
11089 Ity_I64
, 0/*regparms*/,
11090 "x86g_calculate_mmx_pmaddwd",
11091 &x86g_calculate_mmx_pmaddwd
,
11092 mkIRExprVec_2( mkexpr(s1Hi
), mkexpr(s2Hi
))
11094 assign( dLo
, mkIRExprCCall(
11095 Ity_I64
, 0/*regparms*/,
11096 "x86g_calculate_mmx_pmaddwd",
11097 &x86g_calculate_mmx_pmaddwd
,
11098 mkIRExprVec_2( mkexpr(s1Lo
), mkexpr(s2Lo
))
11100 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(dHi
), mkexpr(dLo
))) ;
11101 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11102 goto decode_success
;
11105 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11106 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEE) {
11107 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11108 "pmaxsw", Iop_Max16Sx8
, False
);
11109 goto decode_success
;
11112 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11113 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDE) {
11114 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11115 "pmaxub", Iop_Max8Ux16
, False
);
11116 goto decode_success
;
11119 /* 66 0F EA = PMINSW -- 16x8 signed min */
11120 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEA) {
11121 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11122 "pminsw", Iop_Min16Sx8
, False
);
11123 goto decode_success
;
11126 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11127 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDA) {
11128 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11129 "pminub", Iop_Min8Ux16
, False
);
11130 goto decode_success
;
11133 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11134 in xmm(E), turn them into a byte, and put zero-extend of it in
11136 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD7) {
11138 if (epartIsReg(modrm
)) {
11139 t0
= newTemp(Ity_I64
);
11140 t1
= newTemp(Ity_I64
);
11141 assign(t0
, getXMMRegLane64(eregOfRM(modrm
), 0));
11142 assign(t1
, getXMMRegLane64(eregOfRM(modrm
), 1));
11143 t5
= newTemp(Ity_I32
);
11147 unop(Iop_GetMSBs8x8
, mkexpr(t1
)),
11148 unop(Iop_GetMSBs8x8
, mkexpr(t0
)))));
11149 putIReg(4, gregOfRM(modrm
), mkexpr(t5
));
11150 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11151 nameIReg(4,gregOfRM(modrm
)));
11153 goto decode_success
;
11155 /* else fall through */
11158 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11159 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE4) {
11160 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11161 "pmulhuw", Iop_MulHi16Ux8
, False
);
11162 goto decode_success
;
11165 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11166 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE5) {
11167 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11168 "pmulhw", Iop_MulHi16Sx8
, False
);
11169 goto decode_success
;
11172 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11173 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD5) {
11174 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11175 "pmullw", Iop_Mul16x8
, False
);
11176 goto decode_success
;
11179 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11180 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11181 0 to form 64-bit result */
11182 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF4) {
11183 IRTemp sV
= newTemp(Ity_I64
);
11184 IRTemp dV
= newTemp(Ity_I64
);
11185 t1
= newTemp(Ity_I32
);
11186 t0
= newTemp(Ity_I32
);
11190 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
11192 if (epartIsReg(modrm
)) {
11193 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
11195 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
11196 nameMMXReg(gregOfRM(modrm
)));
11198 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11199 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
11201 DIP("pmuludq %s,%s\n", dis_buf
,
11202 nameMMXReg(gregOfRM(modrm
)));
11205 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
11206 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
11207 putMMXReg( gregOfRM(modrm
),
11208 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
11209 goto decode_success
;
11212 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11213 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11215 /* This is a really poor translation -- could be improved if
11216 performance critical */
11217 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF4) {
11219 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11220 sV
= newTemp(Ity_V128
);
11221 dV
= newTemp(Ity_V128
);
11222 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11223 t1
= newTemp(Ity_I64
);
11224 t0
= newTemp(Ity_I64
);
11226 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
11228 if (epartIsReg(modrm
)) {
11229 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11231 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11232 nameXMMReg(gregOfRM(modrm
)));
11234 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11235 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11237 DIP("pmuludq %s,%s\n", dis_buf
,
11238 nameXMMReg(gregOfRM(modrm
)));
11241 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11242 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11244 assign( t0
, binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) );
11245 putXMMRegLane64( gregOfRM(modrm
), 0, mkexpr(t0
) );
11246 assign( t1
, binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)) );
11247 putXMMRegLane64( gregOfRM(modrm
), 1, mkexpr(t1
) );
11248 goto decode_success
;
11251 /* 66 0F EB = POR */
11252 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEB) {
11253 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "por", Iop_OrV128
);
11254 goto decode_success
;
11257 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11258 from E(xmm or mem) to G(xmm) */
11259 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF6) {
11260 IRTemp s1V
= newTemp(Ity_V128
);
11261 IRTemp s2V
= newTemp(Ity_V128
);
11262 IRTemp dV
= newTemp(Ity_V128
);
11263 IRTemp s1Hi
= newTemp(Ity_I64
);
11264 IRTemp s1Lo
= newTemp(Ity_I64
);
11265 IRTemp s2Hi
= newTemp(Ity_I64
);
11266 IRTemp s2Lo
= newTemp(Ity_I64
);
11267 IRTemp dHi
= newTemp(Ity_I64
);
11268 IRTemp dLo
= newTemp(Ity_I64
);
11270 if (epartIsReg(modrm
)) {
11271 assign( s1V
, getXMMReg(eregOfRM(modrm
)) );
11273 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11274 nameXMMReg(gregOfRM(modrm
)));
11276 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11277 assign( s1V
, loadLE(Ity_V128
, mkexpr(addr
)) );
11279 DIP("psadbw %s,%s\n", dis_buf
,
11280 nameXMMReg(gregOfRM(modrm
)));
11282 assign( s2V
, getXMMReg(gregOfRM(modrm
)) );
11283 assign( s1Hi
, unop(Iop_V128HIto64
, mkexpr(s1V
)) );
11284 assign( s1Lo
, unop(Iop_V128to64
, mkexpr(s1V
)) );
11285 assign( s2Hi
, unop(Iop_V128HIto64
, mkexpr(s2V
)) );
11286 assign( s2Lo
, unop(Iop_V128to64
, mkexpr(s2V
)) );
11287 assign( dHi
, mkIRExprCCall(
11288 Ity_I64
, 0/*regparms*/,
11289 "x86g_calculate_mmx_psadbw",
11290 &x86g_calculate_mmx_psadbw
,
11291 mkIRExprVec_2( mkexpr(s1Hi
), mkexpr(s2Hi
))
11293 assign( dLo
, mkIRExprCCall(
11294 Ity_I64
, 0/*regparms*/,
11295 "x86g_calculate_mmx_psadbw",
11296 &x86g_calculate_mmx_psadbw
,
11297 mkIRExprVec_2( mkexpr(s1Lo
), mkexpr(s2Lo
))
11299 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(dHi
), mkexpr(dLo
))) ;
11300 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11301 goto decode_success
;
11304 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11305 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x70) {
11307 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
11308 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11309 sV
= newTemp(Ity_V128
);
11310 dV
= newTemp(Ity_V128
);
11312 if (epartIsReg(modrm
)) {
11313 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11314 order
= (Int
)insn
[3];
11316 DIP("pshufd $%d,%s,%s\n", order
,
11317 nameXMMReg(eregOfRM(modrm
)),
11318 nameXMMReg(gregOfRM(modrm
)));
11320 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11321 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11322 order
= (Int
)insn
[2+alen
];
11324 DIP("pshufd $%d,%s,%s\n", order
,
11326 nameXMMReg(gregOfRM(modrm
)));
11328 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11331 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11333 mk128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11334 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11336 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11338 goto decode_success
;
11341 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11342 mem) to G(xmm), and copy lower half */
11343 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x70) {
11345 IRTemp sVhi
, dVhi
, sV
, dV
, s3
, s2
, s1
, s0
;
11346 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11347 sV
= newTemp(Ity_V128
);
11348 dV
= newTemp(Ity_V128
);
11349 sVhi
= newTemp(Ity_I64
);
11350 dVhi
= newTemp(Ity_I64
);
11352 if (epartIsReg(modrm
)) {
11353 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11354 order
= (Int
)insn
[4];
11356 DIP("pshufhw $%d,%s,%s\n", order
,
11357 nameXMMReg(eregOfRM(modrm
)),
11358 nameXMMReg(gregOfRM(modrm
)));
11360 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11361 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11362 order
= (Int
)insn
[3+alen
];
11364 DIP("pshufhw $%d,%s,%s\n", order
,
11366 nameXMMReg(gregOfRM(modrm
)));
11368 assign( sVhi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11369 breakup64to16s( sVhi
, &s3
, &s2
, &s1
, &s0
);
11372 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11374 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11375 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11377 assign(dV
, binop( Iop_64HLtoV128
,
11379 unop(Iop_V128to64
, mkexpr(sV
))) );
11380 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11382 goto decode_success
;
11385 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11386 mem) to G(xmm), and copy upper half */
11387 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x70) {
11389 IRTemp sVlo
, dVlo
, sV
, dV
, s3
, s2
, s1
, s0
;
11390 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11391 sV
= newTemp(Ity_V128
);
11392 dV
= newTemp(Ity_V128
);
11393 sVlo
= newTemp(Ity_I64
);
11394 dVlo
= newTemp(Ity_I64
);
11396 if (epartIsReg(modrm
)) {
11397 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11398 order
= (Int
)insn
[4];
11400 DIP("pshuflw $%d,%s,%s\n", order
,
11401 nameXMMReg(eregOfRM(modrm
)),
11402 nameXMMReg(gregOfRM(modrm
)));
11404 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11405 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11406 order
= (Int
)insn
[3+alen
];
11408 DIP("pshuflw $%d,%s,%s\n", order
,
11410 nameXMMReg(gregOfRM(modrm
)));
11412 assign( sVlo
, unop(Iop_V128to64
, mkexpr(sV
)) );
11413 breakup64to16s( sVlo
, &s3
, &s2
, &s1
, &s0
);
11416 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11418 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11419 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11421 assign(dV
, binop( Iop_64HLtoV128
,
11422 unop(Iop_V128HIto64
, mkexpr(sV
)),
11424 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11426 goto decode_success
;
11429 /* 66 0F 72 /6 ib = PSLLD by immediate */
11430 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11431 && epartIsReg(insn
[2])
11432 && gregOfRM(insn
[2]) == 6) {
11433 delta
= dis_SSE_shiftE_imm( delta
+2, "pslld", Iop_ShlN32x4
);
11434 goto decode_success
;
11437 /* 66 0F F2 = PSLLD by E */
11438 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF2) {
11439 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "pslld", Iop_ShlN32x4
);
11440 goto decode_success
;
11443 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11444 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11445 && epartIsReg(insn
[2])
11446 && gregOfRM(insn
[2]) == 7) {
11447 IRTemp sV
, dV
, hi64
, lo64
, hi64r
, lo64r
;
11448 Int imm
= (Int
)insn
[3];
11449 Int reg
= eregOfRM(insn
[2]);
11450 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
11451 vassert(imm
>= 0 && imm
<= 255);
11454 sV
= newTemp(Ity_V128
);
11455 dV
= newTemp(Ity_V128
);
11456 hi64
= newTemp(Ity_I64
);
11457 lo64
= newTemp(Ity_I64
);
11458 hi64r
= newTemp(Ity_I64
);
11459 lo64r
= newTemp(Ity_I64
);
11462 putXMMReg(reg
, mkV128(0x0000));
11463 goto decode_success
;
11466 assign( sV
, getXMMReg(reg
) );
11467 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11468 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
11471 assign( lo64r
, mkexpr(lo64
) );
11472 assign( hi64r
, mkexpr(hi64
) );
11476 assign( lo64r
, mkU64(0) );
11477 assign( hi64r
, mkexpr(lo64
) );
11481 assign( lo64r
, mkU64(0) );
11482 assign( hi64r
, binop( Iop_Shl64
,
11484 mkU8( 8*(imm
-8) ) ));
11486 assign( lo64r
, binop( Iop_Shl64
,
11491 binop(Iop_Shl64
, mkexpr(hi64
),
11493 binop(Iop_Shr64
, mkexpr(lo64
),
11494 mkU8(8 * (8 - imm
)) )
11498 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
11499 putXMMReg(reg
, mkexpr(dV
));
11500 goto decode_success
;
11503 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11504 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11505 && epartIsReg(insn
[2])
11506 && gregOfRM(insn
[2]) == 6) {
11507 delta
= dis_SSE_shiftE_imm( delta
+2, "psllq", Iop_ShlN64x2
);
11508 goto decode_success
;
11511 /* 66 0F F3 = PSLLQ by E */
11512 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF3) {
11513 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psllq", Iop_ShlN64x2
);
11514 goto decode_success
;
11517 /* 66 0F 71 /6 ib = PSLLW by immediate */
11518 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11519 && epartIsReg(insn
[2])
11520 && gregOfRM(insn
[2]) == 6) {
11521 delta
= dis_SSE_shiftE_imm( delta
+2, "psllw", Iop_ShlN16x8
);
11522 goto decode_success
;
11525 /* 66 0F F1 = PSLLW by E */
11526 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF1) {
11527 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psllw", Iop_ShlN16x8
);
11528 goto decode_success
;
11531 /* 66 0F 72 /4 ib = PSRAD by immediate */
11532 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11533 && epartIsReg(insn
[2])
11534 && gregOfRM(insn
[2]) == 4) {
11535 delta
= dis_SSE_shiftE_imm( delta
+2, "psrad", Iop_SarN32x4
);
11536 goto decode_success
;
11539 /* 66 0F E2 = PSRAD by E */
11540 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE2) {
11541 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrad", Iop_SarN32x4
);
11542 goto decode_success
;
11545 /* 66 0F 71 /4 ib = PSRAW by immediate */
11546 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11547 && epartIsReg(insn
[2])
11548 && gregOfRM(insn
[2]) == 4) {
11549 delta
= dis_SSE_shiftE_imm( delta
+2, "psraw", Iop_SarN16x8
);
11550 goto decode_success
;
11553 /* 66 0F E1 = PSRAW by E */
11554 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE1) {
11555 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psraw", Iop_SarN16x8
);
11556 goto decode_success
;
11559 /* 66 0F 72 /2 ib = PSRLD by immediate */
11560 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11561 && epartIsReg(insn
[2])
11562 && gregOfRM(insn
[2]) == 2) {
11563 delta
= dis_SSE_shiftE_imm( delta
+2, "psrld", Iop_ShrN32x4
);
11564 goto decode_success
;
11567 /* 66 0F D2 = PSRLD by E */
11568 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD2) {
11569 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrld", Iop_ShrN32x4
);
11570 goto decode_success
;
11573 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11574 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11575 && epartIsReg(insn
[2])
11576 && gregOfRM(insn
[2]) == 3) {
11577 IRTemp sV
, dV
, hi64
, lo64
, hi64r
, lo64r
;
11578 Int imm
= (Int
)insn
[3];
11579 Int reg
= eregOfRM(insn
[2]);
11580 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
11581 vassert(imm
>= 0 && imm
<= 255);
11584 sV
= newTemp(Ity_V128
);
11585 dV
= newTemp(Ity_V128
);
11586 hi64
= newTemp(Ity_I64
);
11587 lo64
= newTemp(Ity_I64
);
11588 hi64r
= newTemp(Ity_I64
);
11589 lo64r
= newTemp(Ity_I64
);
11592 putXMMReg(reg
, mkV128(0x0000));
11593 goto decode_success
;
11596 assign( sV
, getXMMReg(reg
) );
11597 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11598 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
11601 assign( lo64r
, mkexpr(lo64
) );
11602 assign( hi64r
, mkexpr(hi64
) );
11606 assign( hi64r
, mkU64(0) );
11607 assign( lo64r
, mkexpr(hi64
) );
11611 assign( hi64r
, mkU64(0) );
11612 assign( lo64r
, binop( Iop_Shr64
,
11614 mkU8( 8*(imm
-8) ) ));
11616 assign( hi64r
, binop( Iop_Shr64
,
11621 binop(Iop_Shr64
, mkexpr(lo64
),
11623 binop(Iop_Shl64
, mkexpr(hi64
),
11624 mkU8(8 * (8 - imm
)) )
11629 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
11630 putXMMReg(reg
, mkexpr(dV
));
11631 goto decode_success
;
11634 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11635 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11636 && epartIsReg(insn
[2])
11637 && gregOfRM(insn
[2]) == 2) {
11638 delta
= dis_SSE_shiftE_imm( delta
+2, "psrlq", Iop_ShrN64x2
);
11639 goto decode_success
;
11642 /* 66 0F D3 = PSRLQ by E */
11643 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD3) {
11644 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrlq", Iop_ShrN64x2
);
11645 goto decode_success
;
11648 /* 66 0F 71 /2 ib = PSRLW by immediate */
11649 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11650 && epartIsReg(insn
[2])
11651 && gregOfRM(insn
[2]) == 2) {
11652 delta
= dis_SSE_shiftE_imm( delta
+2, "psrlw", Iop_ShrN16x8
);
11653 goto decode_success
;
11656 /* 66 0F D1 = PSRLW by E */
11657 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD1) {
11658 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrlw", Iop_ShrN16x8
);
11659 goto decode_success
;
11662 /* 66 0F F8 = PSUBB */
11663 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF8) {
11664 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11665 "psubb", Iop_Sub8x16
, False
);
11666 goto decode_success
;
11669 /* 66 0F FA = PSUBD */
11670 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFA) {
11671 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11672 "psubd", Iop_Sub32x4
, False
);
11673 goto decode_success
;
11676 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11677 /* 0F FB = PSUBQ -- sub 64x1 */
11678 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xFB) {
11680 delta
= dis_MMXop_regmem_to_reg (
11681 sorb
, delta
+2, insn
[1], "psubq", False
);
11682 goto decode_success
;
11685 /* 66 0F FB = PSUBQ */
11686 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFB) {
11687 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11688 "psubq", Iop_Sub64x2
, False
);
11689 goto decode_success
;
11692 /* 66 0F F9 = PSUBW */
11693 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF9) {
11694 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11695 "psubw", Iop_Sub16x8
, False
);
11696 goto decode_success
;
11699 /* 66 0F E8 = PSUBSB */
11700 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE8) {
11701 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11702 "psubsb", Iop_QSub8Sx16
, False
);
11703 goto decode_success
;
11706 /* 66 0F E9 = PSUBSW */
11707 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE9) {
11708 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11709 "psubsw", Iop_QSub16Sx8
, False
);
11710 goto decode_success
;
11713 /* 66 0F D8 = PSUBSB */
11714 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD8) {
11715 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11716 "psubusb", Iop_QSub8Ux16
, False
);
11717 goto decode_success
;
11720 /* 66 0F D9 = PSUBSW */
11721 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD9) {
11722 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11723 "psubusw", Iop_QSub16Ux8
, False
);
11724 goto decode_success
;
11727 /* 66 0F 68 = PUNPCKHBW */
11728 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x68) {
11729 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11731 Iop_InterleaveHI8x16
, True
);
11732 goto decode_success
;
11735 /* 66 0F 6A = PUNPCKHDQ */
11736 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6A) {
11737 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11739 Iop_InterleaveHI32x4
, True
);
11740 goto decode_success
;
11743 /* 66 0F 6D = PUNPCKHQDQ */
11744 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6D) {
11745 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11747 Iop_InterleaveHI64x2
, True
);
11748 goto decode_success
;
11751 /* 66 0F 69 = PUNPCKHWD */
11752 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x69) {
11753 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11755 Iop_InterleaveHI16x8
, True
);
11756 goto decode_success
;
11759 /* 66 0F 60 = PUNPCKLBW */
11760 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x60) {
11761 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11763 Iop_InterleaveLO8x16
, True
);
11764 goto decode_success
;
11767 /* 66 0F 62 = PUNPCKLDQ */
11768 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x62) {
11769 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11771 Iop_InterleaveLO32x4
, True
);
11772 goto decode_success
;
11775 /* 66 0F 6C = PUNPCKLQDQ */
11776 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6C) {
11777 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11779 Iop_InterleaveLO64x2
, True
);
11780 goto decode_success
;
11783 /* 66 0F 61 = PUNPCKLWD */
11784 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x61) {
11785 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11787 Iop_InterleaveLO16x8
, True
);
11788 goto decode_success
;
11791 /* 66 0F EF = PXOR */
11792 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEF) {
11793 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "pxor", Iop_XorV128
);
11794 goto decode_success
;
11797 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11798 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11799 //-- && (!epartIsReg(insn[2]))
11800 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11801 //-- Bool store = gregOfRM(insn[2]) == 0;
11802 //-- vg_assert(sz == 4);
11803 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11804 //-- t1 = LOW24(pair);
11805 //-- eip += 2+HI8(pair);
11806 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11807 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11808 //-- Lit16, (UShort)insn[2],
11809 //-- TempReg, t1 );
11810 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11811 //-- goto decode_success;
11814 /* 0F AE /7 = CLFLUSH -- flush cache line */
11815 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
11816 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 7) {
11818 /* This is something of a hack. We need to know the size of the
11819 cache line containing addr. Since we don't (easily), assume
11820 256 on the basis that no real cache would have a line that
11821 big. It's safe to invalidate more stuff than we need, just
11823 UInt lineszB
= 256;
11825 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11828 /* Round addr down to the start of the containing block. */
11833 mkU32( ~(lineszB
-1) ))) );
11835 stmt( IRStmt_Put(OFFB_CMLEN
, mkU32(lineszB
) ) );
11837 jmp_lit(&dres
, Ijk_InvalICache
, (Addr32
)(guest_EIP_bbstart
+delta
));
11839 DIP("clflush %s\n", dis_buf
);
11840 goto decode_success
;
11843 /* ---------------------------------------------------- */
11844 /* --- end of the SSE2 decoder. --- */
11845 /* ---------------------------------------------------- */
11847 /* ---------------------------------------------------- */
11848 /* --- start of the SSE3 decoder. --- */
11849 /* ---------------------------------------------------- */
11851 /* Skip parts of the decoder which don't apply given the stated
11852 guest subarchitecture. */
11853 if (0 == (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE3
))
11854 goto after_sse_decoders
; /* no SSE3 capabilities */
11856 insn
= &guest_code
[delta
];
11858 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11859 duplicating some lanes (2:2:0:0). */
11860 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11861 duplicating some lanes (3:3:1:1). */
11862 if (sz
== 4 && insn
[0] == 0xF3 && insn
[1] == 0x0F
11863 && (insn
[2] == 0x12 || insn
[2] == 0x16)) {
11864 IRTemp s3
, s2
, s1
, s0
;
11865 IRTemp sV
= newTemp(Ity_V128
);
11866 Bool isH
= insn
[2] == 0x16;
11867 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11870 if (epartIsReg(modrm
)) {
11871 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
11872 DIP("movs%cdup %s,%s\n", isH
? 'h' : 'l',
11873 nameXMMReg(eregOfRM(modrm
)),
11874 nameXMMReg(gregOfRM(modrm
)));
11877 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11878 gen_SEGV_if_not_16_aligned( addr
);
11879 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11880 DIP("movs%cdup %s,%s\n", isH
? 'h' : 'l',
11882 nameXMMReg(gregOfRM(modrm
)));
11886 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11887 putXMMReg( gregOfRM(modrm
),
11888 isH
? mk128from32s( s3
, s3
, s1
, s1
)
11889 : mk128from32s( s2
, s2
, s0
, s0
) );
11890 goto decode_success
;
11893 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11894 duplicating some lanes (0:1:0:1). */
11895 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x12) {
11896 IRTemp sV
= newTemp(Ity_V128
);
11897 IRTemp d0
= newTemp(Ity_I64
);
11900 if (epartIsReg(modrm
)) {
11901 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
11902 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11903 nameXMMReg(gregOfRM(modrm
)));
11905 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11907 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11908 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
11909 DIP("movddup %s,%s\n", dis_buf
,
11910 nameXMMReg(gregOfRM(modrm
)));
11914 putXMMReg( gregOfRM(modrm
), binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
11915 goto decode_success
;
11918 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11919 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xD0) {
11920 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11921 IRTemp eV
= newTemp(Ity_V128
);
11922 IRTemp gV
= newTemp(Ity_V128
);
11923 IRTemp addV
= newTemp(Ity_V128
);
11924 IRTemp subV
= newTemp(Ity_V128
);
11925 IRTemp rm
= newTemp(Ity_I32
);
11926 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11929 if (epartIsReg(modrm
)) {
11930 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11931 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11932 nameXMMReg(gregOfRM(modrm
)));
11935 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11936 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11937 DIP("addsubps %s,%s\n", dis_buf
,
11938 nameXMMReg(gregOfRM(modrm
)));
11942 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
11944 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11945 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11946 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11948 breakup128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11949 breakup128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11951 putXMMReg( gregOfRM(modrm
), mk128from32s( a3
, s2
, a1
, s0
));
11952 goto decode_success
;
11955 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11956 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD0) {
11957 IRTemp eV
= newTemp(Ity_V128
);
11958 IRTemp gV
= newTemp(Ity_V128
);
11959 IRTemp addV
= newTemp(Ity_V128
);
11960 IRTemp subV
= newTemp(Ity_V128
);
11961 IRTemp a1
= newTemp(Ity_I64
);
11962 IRTemp s0
= newTemp(Ity_I64
);
11963 IRTemp rm
= newTemp(Ity_I32
);
11966 if (epartIsReg(modrm
)) {
11967 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11968 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11969 nameXMMReg(gregOfRM(modrm
)));
11972 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11973 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11974 DIP("addsubpd %s,%s\n", dis_buf
,
11975 nameXMMReg(gregOfRM(modrm
)));
11979 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
11981 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11982 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11983 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11985 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11986 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11988 putXMMReg( gregOfRM(modrm
),
11989 binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11990 goto decode_success
;
11993 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11994 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11995 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F
11996 && (insn
[2] == 0x7C || insn
[2] == 0x7D)) {
11997 IRTemp e3
, e2
, e1
, e0
, g3
, g2
, g1
, g0
;
11998 IRTemp eV
= newTemp(Ity_V128
);
11999 IRTemp gV
= newTemp(Ity_V128
);
12000 IRTemp leftV
= newTemp(Ity_V128
);
12001 IRTemp rightV
= newTemp(Ity_V128
);
12002 IRTemp rm
= newTemp(Ity_I32
);
12003 Bool isAdd
= insn
[2] == 0x7C;
12004 const HChar
* str
= isAdd
? "add" : "sub";
12005 e3
= e2
= e1
= e0
= g3
= g2
= g1
= g0
= IRTemp_INVALID
;
12008 if (epartIsReg(modrm
)) {
12009 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
12010 DIP("h%sps %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12011 nameXMMReg(gregOfRM(modrm
)));
12014 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12015 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12016 DIP("h%sps %s,%s\n", str
, dis_buf
,
12017 nameXMMReg(gregOfRM(modrm
)));
12021 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
12023 breakup128to32s( eV
, &e3
, &e2
, &e1
, &e0
);
12024 breakup128to32s( gV
, &g3
, &g2
, &g1
, &g0
);
12026 assign( leftV
, mk128from32s( e2
, e0
, g2
, g0
) );
12027 assign( rightV
, mk128from32s( e3
, e1
, g3
, g1
) );
12029 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12030 putXMMReg( gregOfRM(modrm
),
12031 triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
12032 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
12033 goto decode_success
;
12036 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12037 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12038 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x7C || insn
[1] == 0x7D)) {
12039 IRTemp e1
= newTemp(Ity_I64
);
12040 IRTemp e0
= newTemp(Ity_I64
);
12041 IRTemp g1
= newTemp(Ity_I64
);
12042 IRTemp g0
= newTemp(Ity_I64
);
12043 IRTemp eV
= newTemp(Ity_V128
);
12044 IRTemp gV
= newTemp(Ity_V128
);
12045 IRTemp leftV
= newTemp(Ity_V128
);
12046 IRTemp rightV
= newTemp(Ity_V128
);
12047 IRTemp rm
= newTemp(Ity_I32
);
12048 Bool isAdd
= insn
[1] == 0x7C;
12049 const HChar
* str
= isAdd
? "add" : "sub";
12052 if (epartIsReg(modrm
)) {
12053 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
12054 DIP("h%spd %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12055 nameXMMReg(gregOfRM(modrm
)));
12058 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
12059 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12060 DIP("h%spd %s,%s\n", str
, dis_buf
,
12061 nameXMMReg(gregOfRM(modrm
)));
12065 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
12067 assign( e1
, unop(Iop_V128HIto64
, mkexpr(eV
) ));
12068 assign( e0
, unop(Iop_V128to64
, mkexpr(eV
) ));
12069 assign( g1
, unop(Iop_V128HIto64
, mkexpr(gV
) ));
12070 assign( g0
, unop(Iop_V128to64
, mkexpr(gV
) ));
12072 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(e0
),mkexpr(g0
)) );
12073 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(e1
),mkexpr(g1
)) );
12075 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12076 putXMMReg( gregOfRM(modrm
),
12077 triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
12078 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
12079 goto decode_success
;
12082 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12083 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xF0) {
12084 modrm
= getIByte(delta
+3);
12085 if (epartIsReg(modrm
)) {
12086 goto decode_failure
;
12088 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12089 putXMMReg( gregOfRM(modrm
),
12090 loadLE(Ity_V128
, mkexpr(addr
)) );
12091 DIP("lddqu %s,%s\n", dis_buf
,
12092 nameXMMReg(gregOfRM(modrm
)));
12095 goto decode_success
;
12098 /* ---------------------------------------------------- */
12099 /* --- end of the SSE3 decoder. --- */
12100 /* ---------------------------------------------------- */
12102 /* ---------------------------------------------------- */
12103 /* --- start of the SSSE3 decoder. --- */
12104 /* ---------------------------------------------------- */
12106 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12107 Unsigned Bytes (MMX) */
12109 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x04) {
12110 IRTemp sV
= newTemp(Ity_I64
);
12111 IRTemp dV
= newTemp(Ity_I64
);
12112 IRTemp sVoddsSX
= newTemp(Ity_I64
);
12113 IRTemp sVevensSX
= newTemp(Ity_I64
);
12114 IRTemp dVoddsZX
= newTemp(Ity_I64
);
12115 IRTemp dVevensZX
= newTemp(Ity_I64
);
12119 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12121 if (epartIsReg(modrm
)) {
12122 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12124 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12125 nameMMXReg(gregOfRM(modrm
)));
12127 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12128 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12130 DIP("pmaddubsw %s,%s\n", dis_buf
,
12131 nameMMXReg(gregOfRM(modrm
)));
12134 /* compute dV unsigned x sV signed */
12136 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
12138 binop(Iop_SarN16x4
,
12139 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
12142 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
12144 binop(Iop_ShrN16x4
,
12145 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
12150 binop(Iop_QAdd16Sx4
,
12151 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
12152 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
12155 goto decode_success
;
12158 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12159 Unsigned Bytes (XMM) */
12161 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x04) {
12162 IRTemp sV
= newTemp(Ity_V128
);
12163 IRTemp dV
= newTemp(Ity_V128
);
12164 IRTemp sVoddsSX
= newTemp(Ity_V128
);
12165 IRTemp sVevensSX
= newTemp(Ity_V128
);
12166 IRTemp dVoddsZX
= newTemp(Ity_V128
);
12167 IRTemp dVevensZX
= newTemp(Ity_V128
);
12170 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12172 if (epartIsReg(modrm
)) {
12173 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12175 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12176 nameXMMReg(gregOfRM(modrm
)));
12178 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12179 gen_SEGV_if_not_16_aligned( addr
);
12180 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12182 DIP("pmaddubsw %s,%s\n", dis_buf
,
12183 nameXMMReg(gregOfRM(modrm
)));
12186 /* compute dV unsigned x sV signed */
12188 binop(Iop_SarN16x8
, mkexpr(sV
), mkU8(8)) );
12190 binop(Iop_SarN16x8
,
12191 binop(Iop_ShlN16x8
, mkexpr(sV
), mkU8(8)),
12194 binop(Iop_ShrN16x8
, mkexpr(dV
), mkU8(8)) );
12196 binop(Iop_ShrN16x8
,
12197 binop(Iop_ShlN16x8
, mkexpr(dV
), mkU8(8)),
12202 binop(Iop_QAdd16Sx8
,
12203 binop(Iop_Mul16x8
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
12204 binop(Iop_Mul16x8
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
12207 goto decode_success
;
12210 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12211 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12212 mmx) and G to G (mmx). */
12213 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12214 mmx) and G to G (mmx). */
12215 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12217 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12219 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12221 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12225 && insn
[0] == 0x0F && insn
[1] == 0x38
12226 && (insn
[2] == 0x03 || insn
[2] == 0x07 || insn
[2] == 0x01
12227 || insn
[2] == 0x05 || insn
[2] == 0x02 || insn
[2] == 0x06)) {
12228 const HChar
* str
= "???";
12229 IROp opV64
= Iop_INVALID
;
12230 IROp opCatO
= Iop_CatOddLanes16x4
;
12231 IROp opCatE
= Iop_CatEvenLanes16x4
;
12232 IRTemp sV
= newTemp(Ity_I64
);
12233 IRTemp dV
= newTemp(Ity_I64
);
12238 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
12239 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
12240 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
12241 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
12242 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
12243 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
12244 default: vassert(0);
12246 if (insn
[2] == 0x02 || insn
[2] == 0x06) {
12247 opCatO
= Iop_InterleaveHI32x2
;
12248 opCatE
= Iop_InterleaveLO32x2
;
12252 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12254 if (epartIsReg(modrm
)) {
12255 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12257 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12258 nameMMXReg(gregOfRM(modrm
)));
12260 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12261 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12263 DIP("ph%s %s,%s\n", str
, dis_buf
,
12264 nameMMXReg(gregOfRM(modrm
)));
12270 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
12271 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
12274 goto decode_success
;
12277 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12278 xmm) and G to G (xmm). */
12279 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12280 xmm) and G to G (xmm). */
12281 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12283 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12285 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12287 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12291 && insn
[0] == 0x0F && insn
[1] == 0x38
12292 && (insn
[2] == 0x03 || insn
[2] == 0x07 || insn
[2] == 0x01
12293 || insn
[2] == 0x05 || insn
[2] == 0x02 || insn
[2] == 0x06)) {
12294 const HChar
* str
= "???";
12295 IROp opV64
= Iop_INVALID
;
12296 IROp opCatO
= Iop_CatOddLanes16x4
;
12297 IROp opCatE
= Iop_CatEvenLanes16x4
;
12298 IRTemp sV
= newTemp(Ity_V128
);
12299 IRTemp dV
= newTemp(Ity_V128
);
12300 IRTemp sHi
= newTemp(Ity_I64
);
12301 IRTemp sLo
= newTemp(Ity_I64
);
12302 IRTemp dHi
= newTemp(Ity_I64
);
12303 IRTemp dLo
= newTemp(Ity_I64
);
12308 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
12309 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
12310 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
12311 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
12312 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
12313 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
12314 default: vassert(0);
12316 if (insn
[2] == 0x02 || insn
[2] == 0x06) {
12317 opCatO
= Iop_InterleaveHI32x2
;
12318 opCatE
= Iop_InterleaveLO32x2
;
12321 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12323 if (epartIsReg(modrm
)) {
12324 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
12325 DIP("ph%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12326 nameXMMReg(gregOfRM(modrm
)));
12329 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12330 gen_SEGV_if_not_16_aligned( addr
);
12331 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12332 DIP("ph%s %s,%s\n", str
, dis_buf
,
12333 nameXMMReg(gregOfRM(modrm
)));
12337 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12338 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12339 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12340 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12342 /* This isn't a particularly efficient way to compute the
12343 result, but at least it avoids a proliferation of IROps,
12344 hence avoids complication all the backends. */
12347 binop(Iop_64HLtoV128
,
12349 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
12350 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
))
12353 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
12354 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
))
12358 goto decode_success
;
12361 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12364 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x0B) {
12365 IRTemp sV
= newTemp(Ity_I64
);
12366 IRTemp dV
= newTemp(Ity_I64
);
12370 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12372 if (epartIsReg(modrm
)) {
12373 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12375 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12376 nameMMXReg(gregOfRM(modrm
)));
12378 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12379 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12381 DIP("pmulhrsw %s,%s\n", dis_buf
,
12382 nameMMXReg(gregOfRM(modrm
)));
12387 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
12389 goto decode_success
;
12392 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12395 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x0B) {
12396 IRTemp sV
= newTemp(Ity_V128
);
12397 IRTemp dV
= newTemp(Ity_V128
);
12398 IRTemp sHi
= newTemp(Ity_I64
);
12399 IRTemp sLo
= newTemp(Ity_I64
);
12400 IRTemp dHi
= newTemp(Ity_I64
);
12401 IRTemp dLo
= newTemp(Ity_I64
);
12404 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12406 if (epartIsReg(modrm
)) {
12407 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12409 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12410 nameXMMReg(gregOfRM(modrm
)));
12412 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12413 gen_SEGV_if_not_16_aligned( addr
);
12414 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12416 DIP("pmulhrsw %s,%s\n", dis_buf
,
12417 nameXMMReg(gregOfRM(modrm
)));
12420 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12421 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12422 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12423 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12427 binop(Iop_64HLtoV128
,
12428 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
12429 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
12432 goto decode_success
;
12435 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12436 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12437 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12439 && insn
[0] == 0x0F && insn
[1] == 0x38
12440 && (insn
[2] == 0x08 || insn
[2] == 0x09 || insn
[2] == 0x0A)) {
12441 IRTemp sV
= newTemp(Ity_I64
);
12442 IRTemp dV
= newTemp(Ity_I64
);
12443 const HChar
* str
= "???";
12447 case 0x08: laneszB
= 1; str
= "b"; break;
12448 case 0x09: laneszB
= 2; str
= "w"; break;
12449 case 0x0A: laneszB
= 4; str
= "d"; break;
12450 default: vassert(0);
12455 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12457 if (epartIsReg(modrm
)) {
12458 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12460 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12461 nameMMXReg(gregOfRM(modrm
)));
12463 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12464 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12466 DIP("psign%s %s,%s\n", str
, dis_buf
,
12467 nameMMXReg(gregOfRM(modrm
)));
12472 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
12474 goto decode_success
;
12477 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12478 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12479 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12481 && insn
[0] == 0x0F && insn
[1] == 0x38
12482 && (insn
[2] == 0x08 || insn
[2] == 0x09 || insn
[2] == 0x0A)) {
12483 IRTemp sV
= newTemp(Ity_V128
);
12484 IRTemp dV
= newTemp(Ity_V128
);
12485 IRTemp sHi
= newTemp(Ity_I64
);
12486 IRTemp sLo
= newTemp(Ity_I64
);
12487 IRTemp dHi
= newTemp(Ity_I64
);
12488 IRTemp dLo
= newTemp(Ity_I64
);
12489 const HChar
* str
= "???";
12493 case 0x08: laneszB
= 1; str
= "b"; break;
12494 case 0x09: laneszB
= 2; str
= "w"; break;
12495 case 0x0A: laneszB
= 4; str
= "d"; break;
12496 default: vassert(0);
12500 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12502 if (epartIsReg(modrm
)) {
12503 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12505 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12506 nameXMMReg(gregOfRM(modrm
)));
12508 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12509 gen_SEGV_if_not_16_aligned( addr
);
12510 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12512 DIP("psign%s %s,%s\n", str
, dis_buf
,
12513 nameXMMReg(gregOfRM(modrm
)));
12516 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12517 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12518 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12519 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12523 binop(Iop_64HLtoV128
,
12524 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
12525 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
12528 goto decode_success
;
12531 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12532 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12533 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12535 && insn
[0] == 0x0F && insn
[1] == 0x38
12536 && (insn
[2] == 0x1C || insn
[2] == 0x1D || insn
[2] == 0x1E)) {
12537 IRTemp sV
= newTemp(Ity_I64
);
12538 const HChar
* str
= "???";
12542 case 0x1C: laneszB
= 1; str
= "b"; break;
12543 case 0x1D: laneszB
= 2; str
= "w"; break;
12544 case 0x1E: laneszB
= 4; str
= "d"; break;
12545 default: vassert(0);
12551 if (epartIsReg(modrm
)) {
12552 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12554 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12555 nameMMXReg(gregOfRM(modrm
)));
12557 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12558 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12560 DIP("pabs%s %s,%s\n", str
, dis_buf
,
12561 nameMMXReg(gregOfRM(modrm
)));
12566 dis_PABS_helper( mkexpr(sV
), laneszB
)
12568 goto decode_success
;
12571 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12572 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12573 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12575 && insn
[0] == 0x0F && insn
[1] == 0x38
12576 && (insn
[2] == 0x1C || insn
[2] == 0x1D || insn
[2] == 0x1E)) {
12577 IRTemp sV
= newTemp(Ity_V128
);
12578 IRTemp sHi
= newTemp(Ity_I64
);
12579 IRTemp sLo
= newTemp(Ity_I64
);
12580 const HChar
* str
= "???";
12584 case 0x1C: laneszB
= 1; str
= "b"; break;
12585 case 0x1D: laneszB
= 2; str
= "w"; break;
12586 case 0x1E: laneszB
= 4; str
= "d"; break;
12587 default: vassert(0);
12592 if (epartIsReg(modrm
)) {
12593 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12595 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12596 nameXMMReg(gregOfRM(modrm
)));
12598 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12599 gen_SEGV_if_not_16_aligned( addr
);
12600 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12602 DIP("pabs%s %s,%s\n", str
, dis_buf
,
12603 nameXMMReg(gregOfRM(modrm
)));
12606 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12607 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12611 binop(Iop_64HLtoV128
,
12612 dis_PABS_helper( mkexpr(sHi
), laneszB
),
12613 dis_PABS_helper( mkexpr(sLo
), laneszB
)
12616 goto decode_success
;
12619 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12621 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x0F) {
12622 IRTemp sV
= newTemp(Ity_I64
);
12623 IRTemp dV
= newTemp(Ity_I64
);
12624 IRTemp res
= newTemp(Ity_I64
);
12628 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12630 if (epartIsReg(modrm
)) {
12631 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12632 d32
= (UInt
)insn
[3+1];
12634 DIP("palignr $%u,%s,%s\n", d32
,
12635 nameMMXReg(eregOfRM(modrm
)),
12636 nameMMXReg(gregOfRM(modrm
)));
12638 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12639 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12640 d32
= (UInt
)insn
[3+alen
];
12642 DIP("palignr $%u%s,%s\n", d32
,
12644 nameMMXReg(gregOfRM(modrm
)));
12648 assign( res
, mkexpr(sV
) );
12650 else if (d32
>= 1 && d32
<= 7) {
12653 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d32
)),
12654 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d32
))
12657 else if (d32
== 8) {
12658 assign( res
, mkexpr(dV
) );
12660 else if (d32
>= 9 && d32
<= 15) {
12661 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d32
-8))) );
12663 else if (d32
>= 16 && d32
<= 255) {
12664 assign( res
, mkU64(0) );
12669 putMMXReg( gregOfRM(modrm
), mkexpr(res
) );
12670 goto decode_success
;
12673 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12675 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x0F) {
12676 IRTemp sV
= newTemp(Ity_V128
);
12677 IRTemp dV
= newTemp(Ity_V128
);
12678 IRTemp sHi
= newTemp(Ity_I64
);
12679 IRTemp sLo
= newTemp(Ity_I64
);
12680 IRTemp dHi
= newTemp(Ity_I64
);
12681 IRTemp dLo
= newTemp(Ity_I64
);
12682 IRTemp rHi
= newTemp(Ity_I64
);
12683 IRTemp rLo
= newTemp(Ity_I64
);
12686 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12688 if (epartIsReg(modrm
)) {
12689 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12690 d32
= (UInt
)insn
[3+1];
12692 DIP("palignr $%u,%s,%s\n", d32
,
12693 nameXMMReg(eregOfRM(modrm
)),
12694 nameXMMReg(gregOfRM(modrm
)));
12696 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12697 gen_SEGV_if_not_16_aligned( addr
);
12698 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12699 d32
= (UInt
)insn
[3+alen
];
12701 DIP("palignr $%u,%s,%s\n", d32
,
12703 nameXMMReg(gregOfRM(modrm
)));
12706 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12707 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12708 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12709 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12712 assign( rHi
, mkexpr(sHi
) );
12713 assign( rLo
, mkexpr(sLo
) );
12715 else if (d32
>= 1 && d32
<= 7) {
12716 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, d32
) );
12717 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, d32
) );
12719 else if (d32
== 8) {
12720 assign( rHi
, mkexpr(dLo
) );
12721 assign( rLo
, mkexpr(sHi
) );
12723 else if (d32
>= 9 && d32
<= 15) {
12724 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, d32
-8) );
12725 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, d32
-8) );
12727 else if (d32
== 16) {
12728 assign( rHi
, mkexpr(dHi
) );
12729 assign( rLo
, mkexpr(dLo
) );
12731 else if (d32
>= 17 && d32
<= 23) {
12732 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(d32
-16))) );
12733 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, d32
-16) );
12735 else if (d32
== 24) {
12736 assign( rHi
, mkU64(0) );
12737 assign( rLo
, mkexpr(dHi
) );
12739 else if (d32
>= 25 && d32
<= 31) {
12740 assign( rHi
, mkU64(0) );
12741 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(d32
-24))) );
12743 else if (d32
>= 32 && d32
<= 255) {
12744 assign( rHi
, mkU64(0) );
12745 assign( rLo
, mkU64(0) );
12752 binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
))
12754 goto decode_success
;
12757 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12759 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x00) {
12760 IRTemp sV
= newTemp(Ity_I64
);
12761 IRTemp dV
= newTemp(Ity_I64
);
12765 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12767 if (epartIsReg(modrm
)) {
12768 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12770 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12771 nameMMXReg(gregOfRM(modrm
)));
12773 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12774 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12776 DIP("pshufb %s,%s\n", dis_buf
,
12777 nameMMXReg(gregOfRM(modrm
)));
12784 /* permute the lanes */
12788 binop(Iop_And64
, mkexpr(sV
), mkU64(0x0707070707070707ULL
))
12790 /* mask off lanes which have (index & 0x80) == 0x80 */
12791 unop(Iop_Not64
, binop(Iop_SarN8x8
, mkexpr(sV
), mkU8(7)))
12794 goto decode_success
;
12797 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12799 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x00) {
12800 IRTemp sV
= newTemp(Ity_V128
);
12801 IRTemp dV
= newTemp(Ity_V128
);
12802 IRTemp sHi
= newTemp(Ity_I64
);
12803 IRTemp sLo
= newTemp(Ity_I64
);
12804 IRTemp dHi
= newTemp(Ity_I64
);
12805 IRTemp dLo
= newTemp(Ity_I64
);
12806 IRTemp rHi
= newTemp(Ity_I64
);
12807 IRTemp rLo
= newTemp(Ity_I64
);
12808 IRTemp sevens
= newTemp(Ity_I64
);
12809 IRTemp mask0x80hi
= newTemp(Ity_I64
);
12810 IRTemp mask0x80lo
= newTemp(Ity_I64
);
12811 IRTemp maskBit3hi
= newTemp(Ity_I64
);
12812 IRTemp maskBit3lo
= newTemp(Ity_I64
);
12813 IRTemp sAnd7hi
= newTemp(Ity_I64
);
12814 IRTemp sAnd7lo
= newTemp(Ity_I64
);
12815 IRTemp permdHi
= newTemp(Ity_I64
);
12816 IRTemp permdLo
= newTemp(Ity_I64
);
12819 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12821 if (epartIsReg(modrm
)) {
12822 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12824 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12825 nameXMMReg(gregOfRM(modrm
)));
12827 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12828 gen_SEGV_if_not_16_aligned( addr
);
12829 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12831 DIP("pshufb %s,%s\n", dis_buf
,
12832 nameXMMReg(gregOfRM(modrm
)));
12835 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12836 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12837 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12838 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12840 assign( sevens
, mkU64(0x0707070707070707ULL
) );
12843 mask0x80hi = Not(SarN8x8(sHi,7))
12844 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12845 sAnd7hi = And(sHi,sevens)
12846 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12847 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12848 rHi = And(permdHi,mask0x80hi)
12852 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sHi
),mkU8(7))));
12857 binop(Iop_ShlN8x8
,mkexpr(sHi
),mkU8(4)),
12860 assign(sAnd7hi
, binop(Iop_And64
,mkexpr(sHi
),mkexpr(sevens
)));
12867 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7hi
)),
12868 mkexpr(maskBit3hi
)),
12870 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7hi
)),
12871 unop(Iop_Not64
,mkexpr(maskBit3hi
))) ));
12873 assign(rHi
, binop(Iop_And64
,mkexpr(permdHi
),mkexpr(mask0x80hi
)) );
12875 /* And the same for the lower half of the result. What fun. */
12879 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sLo
),mkU8(7))));
12884 binop(Iop_ShlN8x8
,mkexpr(sLo
),mkU8(4)),
12887 assign(sAnd7lo
, binop(Iop_And64
,mkexpr(sLo
),mkexpr(sevens
)));
12894 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7lo
)),
12895 mkexpr(maskBit3lo
)),
12897 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7lo
)),
12898 unop(Iop_Not64
,mkexpr(maskBit3lo
))) ));
12900 assign(rLo
, binop(Iop_And64
,mkexpr(permdLo
),mkexpr(mask0x80lo
)) );
12904 binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
))
12906 goto decode_success
;
12909 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
12910 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
12911 if ((sz
== 2 || sz
== 4)
12912 && insn
[0] == 0x0F && insn
[1] == 0x38
12913 && (insn
[2] == 0xF0 || insn
[2] == 0xF1)
12914 && !epartIsReg(insn
[3])) {
12917 addr
= disAMode(&alen
, sorb
, delta
+ 3, dis_buf
);
12920 IRTemp src
= newTemp(ty
);
12922 if (insn
[2] == 0xF0) { /* LOAD */
12923 assign(src
, loadLE(ty
, mkexpr(addr
)));
12924 IRTemp dst
= math_BSWAP(src
, ty
);
12925 putIReg(sz
, gregOfRM(modrm
), mkexpr(dst
));
12926 DIP("movbe %s,%s\n", dis_buf
, nameIReg(sz
, gregOfRM(modrm
)));
12927 } else { /* STORE */
12928 assign(src
, getIReg(sz
, gregOfRM(modrm
)));
12929 IRTemp dst
= math_BSWAP(src
, ty
);
12930 storeLE(mkexpr(addr
), mkexpr(dst
));
12931 DIP("movbe %s,%s\n", nameIReg(sz
, gregOfRM(modrm
)), dis_buf
);
12933 goto decode_success
;
12936 /* ---------------------------------------------------- */
12937 /* --- end of the SSSE3 decoder. --- */
12938 /* ---------------------------------------------------- */
12940 /* ---------------------------------------------------- */
12941 /* --- start of the SSE4 decoder --- */
12942 /* ---------------------------------------------------- */
12944 /* 66 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
12945 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
12947 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x22 ) {
12950 IRTemp src_elems
= newTemp(Ity_I32
);
12951 IRTemp src_vec
= newTemp(Ity_V128
);
12952 IRTemp z32
= newTemp(Ity_I32
);
12956 if ( epartIsReg( modrm
) ) {
12957 imm8_10
= (Int
)(insn
[3+1] & 3);
12958 assign( src_elems
, getIReg( 4, eregOfRM(modrm
) ) );
12960 DIP( "pinsrd $%d, %s,%s\n", imm8_10
,
12961 nameIReg( 4, eregOfRM(modrm
) ),
12962 nameXMMReg( gregOfRM(modrm
) ) );
12964 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
12965 imm8_10
= (Int
)(insn
[3+alen
] & 3);
12966 assign( src_elems
, loadLE( Ity_I32
, mkexpr(addr
) ) );
12968 DIP( "pinsrd $%d, %s,%s\n",
12969 imm8_10
, dis_buf
, nameXMMReg( gregOfRM(modrm
) ) );
12972 assign(z32
, mkU32(0));
12976 case 3: mask
= 0x0FFF;
12977 assign(src_vec
, mk128from32s(src_elems
, z32
, z32
, z32
));
12979 case 2: mask
= 0xF0FF;
12980 assign(src_vec
, mk128from32s(z32
, src_elems
, z32
, z32
));
12982 case 1: mask
= 0xFF0F;
12983 assign(src_vec
, mk128from32s(z32
, z32
, src_elems
, z32
));
12985 case 0: mask
= 0xFFF0;
12986 assign(src_vec
, mk128from32s(z32
, z32
, z32
, src_elems
));
12988 default: vassert(0);
12991 putXMMReg( gregOfRM(modrm
),
12992 binop( Iop_OrV128
, mkexpr(src_vec
),
12993 binop( Iop_AndV128
,
12994 getXMMReg( gregOfRM(modrm
) ),
12995 mkV128(mask
) ) ) );
12997 goto decode_success
;
13000 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
13001 (Partial implementation only -- only deal with cases where
13002 the rounding mode is specified directly by the immediate byte.)
13003 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
13004 (Limitations ditto)
13007 && insn
[0] == 0x0F && insn
[1] == 0x3A
13008 && (insn
[2] == 0x0B || insn
[2] == 0x0A)) {
13010 Bool isD
= insn
[2] == 0x0B;
13011 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
13012 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
13017 if (epartIsReg(modrm
)) {
13019 isD
? getXMMRegLane64F( eregOfRM(modrm
), 0 )
13020 : getXMMRegLane32F( eregOfRM(modrm
), 0 ) );
13022 if (imm
& ~3) goto decode_failure
;
13024 DIP( "rounds%c $%d,%s,%s\n",
13026 imm
, nameXMMReg( eregOfRM(modrm
) ),
13027 nameXMMReg( gregOfRM(modrm
) ) );
13029 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
13030 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
13031 imm
= insn
[3+alen
];
13032 if (imm
& ~3) goto decode_failure
;
13034 DIP( "roundsd $%d,%s,%s\n",
13035 imm
, dis_buf
, nameXMMReg( gregOfRM(modrm
) ) );
13038 /* (imm & 3) contains an Intel-encoded rounding mode. Because
13039 that encoding is the same as the encoding for IRRoundingMode,
13040 we can use that value directly in the IR as a rounding
13042 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
13043 mkU32(imm
& 3), mkexpr(src
)) );
13046 putXMMRegLane64F( gregOfRM(modrm
), 0, mkexpr(res
) );
13048 putXMMRegLane32F( gregOfRM(modrm
), 0, mkexpr(res
) );
13050 goto decode_success
;
13053 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
13054 which we can only decode if we're sure this is an AMD cpu that
13055 supports LZCNT, since otherwise it's BSR, which behaves
13057 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xBD
13058 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_X86_LZCNT
)) {
13059 vassert(sz
== 2 || sz
== 4);
13060 /*IRType*/ ty
= szToITy(sz
);
13061 IRTemp src
= newTemp(ty
);
13063 if (epartIsReg(modrm
)) {
13064 assign(src
, getIReg(sz
, eregOfRM(modrm
)));
13066 DIP("lzcnt%c %s, %s\n", nameISize(sz
),
13067 nameIReg(sz
, eregOfRM(modrm
)),
13068 nameIReg(sz
, gregOfRM(modrm
)));
13070 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
13071 assign(src
, loadLE(ty
, mkexpr(addr
)));
13073 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
13074 nameIReg(sz
, gregOfRM(modrm
)));
13077 IRTemp res
= gen_LZCNT(ty
, src
);
13078 putIReg(sz
, gregOfRM(modrm
), mkexpr(res
));
13080 // Update flags. This is pretty lame .. perhaps can do better
13081 // if this turns out to be performance critical.
13082 // O S A P are cleared. Z is set if RESULT == 0.
13083 // C is set if SRC is zero.
13084 IRTemp src32
= newTemp(Ity_I32
);
13085 IRTemp res32
= newTemp(Ity_I32
);
13086 assign(src32
, widenUto32(mkexpr(src
)));
13087 assign(res32
, widenUto32(mkexpr(res
)));
13089 IRTemp oszacp
= newTemp(Ity_I32
);
13095 binop(Iop_CmpEQ32
, mkexpr(res32
), mkU32(0))),
13096 mkU8(X86G_CC_SHIFT_Z
)),
13099 binop(Iop_CmpEQ32
, mkexpr(src32
), mkU32(0))),
13100 mkU8(X86G_CC_SHIFT_C
))
13104 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13105 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13106 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13107 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
13109 goto decode_success
;
13112 /* ---------------------------------------------------- */
13113 /* --- end of the SSE4 decoder --- */
13114 /* ---------------------------------------------------- */
13116 after_sse_decoders
:
13118 /* ---------------------------------------------------- */
13119 /* --- deal with misc 0x67 pfxs (addr size override) -- */
13120 /* ---------------------------------------------------- */
13122 /* 67 E3 = JCXZ (for JECXZ see below) */
13123 if (insn
[0] == 0x67 && insn
[1] == 0xE3 && sz
== 4) {
13125 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13128 binop(Iop_CmpEQ16
, getIReg(2,R_ECX
), mkU16(0)),
13133 DIP("jcxz 0x%x\n", d32
);
13134 goto decode_success
;
13137 /* 67 E8 = CALL with redundant addr16 prefix */
13138 if (insn
[0] == 0x67 && insn
[1] == 0xE8) {
13142 /* ---------------------------------------------------- */
13143 /* --- start of the baseline insn decoder -- */
13144 /* ---------------------------------------------------- */
13146 /* Get the primary opcode. */
13147 opc
= getIByte(delta
); delta
++;
13149 /* We get here if the current insn isn't SSE, or this CPU doesn't
13154 /* ------------------------ Control flow --------------- */
13156 case 0xC2: /* RET imm16 */
13157 d32
= getUDisp16(delta
);
13159 dis_ret(&dres
, d32
);
13160 DIP("ret %u\n", d32
);
13162 case 0xC3: /* RET */
13167 case 0xCF: /* IRET */
13168 /* Note, this is an extremely kludgey and limited implementation
13169 of iret. All it really does is:
13170 popl %EIP; popl %CS; popl %EFLAGS.
13171 %CS is set but ignored (as it is in (eg) popw %cs)". */
13172 t1
= newTemp(Ity_I32
); /* ESP */
13173 t2
= newTemp(Ity_I32
); /* new EIP */
13174 t3
= newTemp(Ity_I32
); /* new CS */
13175 t4
= newTemp(Ity_I32
); /* new EFLAGS */
13176 assign(t1
, getIReg(4,R_ESP
));
13177 assign(t2
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(0) )));
13178 assign(t3
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(4) )));
13179 assign(t4
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(8) )));
13180 /* Get stuff off stack */
13181 putIReg(4, R_ESP
,binop(Iop_Add32
, mkexpr(t1
), mkU32(12)));
13182 /* set %CS (which is ignored anyway) */
13183 putSReg( R_CS
, unop(Iop_32to16
, mkexpr(t3
)) );
13185 set_EFLAGS_from_value( t4
, False
/*!emit_AC_emwarn*/, 0/*unused*/ );
13186 /* goto new EIP value */
13187 jmp_treg(&dres
, Ijk_Ret
, t2
);
13188 vassert(dres
.whatNext
== Dis_StopHere
);
13189 DIP("iret (very kludgey)\n");
13192 case 0xE8: /* CALL J4 */
13193 d32
= getUDisp32(delta
); delta
+= 4;
13194 d32
+= (guest_EIP_bbstart
+delta
);
13195 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13196 if (d32
== guest_EIP_bbstart
+delta
&& getIByte(delta
) >= 0x58
13197 && getIByte(delta
) <= 0x5F) {
13198 /* Specially treat the position-independent-code idiom
13203 since this generates better code, but for no other reason. */
13204 Int archReg
= getIByte(delta
) - 0x58;
13205 /* vex_printf("-- fPIC thingy\n"); */
13206 putIReg(4, archReg
, mkU32(guest_EIP_bbstart
+delta
));
13207 delta
++; /* Step over the POP */
13208 DIP("call 0x%x ; popl %s\n",d32
,nameIReg(4,archReg
));
13210 /* The normal sequence for a call. */
13211 t1
= newTemp(Ity_I32
);
13212 assign(t1
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
13213 putIReg(4, R_ESP
, mkexpr(t1
));
13214 storeLE( mkexpr(t1
), mkU32(guest_EIP_bbstart
+delta
));
13215 jmp_lit(&dres
, Ijk_Call
, d32
);
13216 vassert(dres
.whatNext
== Dis_StopHere
);
13217 DIP("call 0x%x\n",d32
);
13221 //-- case 0xC8: /* ENTER */
13222 //-- d32 = getUDisp16(eip); eip += 2;
13223 //-- abyte = getIByte(delta); delta++;
13225 //-- vg_assert(sz == 4);
13226 //-- vg_assert(abyte == 0);
13228 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13229 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13230 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13231 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13232 //-- uLiteral(cb, sz);
13233 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13234 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13235 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13237 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13238 //-- uLiteral(cb, d32);
13239 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13241 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13244 case 0xC9: /* LEAVE */
13246 t1
= newTemp(Ity_I32
); t2
= newTemp(Ity_I32
);
13247 assign(t1
, getIReg(4,R_EBP
));
13248 /* First PUT ESP looks redundant, but need it because ESP must
13249 always be up-to-date for Memcheck to work... */
13250 putIReg(4, R_ESP
, mkexpr(t1
));
13251 assign(t2
, loadLE(Ity_I32
,mkexpr(t1
)));
13252 putIReg(4, R_EBP
, mkexpr(t2
));
13253 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t1
), mkU32(4)) );
13257 /* ---------------- Misc weird-ass insns --------------- */
13259 case 0x27: /* DAA */
13260 case 0x2F: /* DAS */
13261 case 0x37: /* AAA */
13262 case 0x3F: /* AAS */
13263 /* An ugly implementation for some ugly instructions. Oh
13265 if (sz
!= 4) goto decode_failure
;
13266 t1
= newTemp(Ity_I32
);
13267 t2
= newTemp(Ity_I32
);
13268 /* Make up a 32-bit value (t1), with the old value of AX in the
13269 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13272 binop(Iop_16HLto32
,
13274 mk_x86g_calculate_eflags_all()),
13277 /* Call the helper fn, to get a new AX and OSZACP value, and
13278 poke both back into the guest state. Also pass the helper
13279 the actual opcode so it knows which of the 4 instructions it
13280 is doing the computation for. */
13281 vassert(opc
== 0x27 || opc
== 0x2F || opc
== 0x37 || opc
== 0x3F);
13284 Ity_I32
, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13285 &x86g_calculate_daa_das_aaa_aas
,
13286 mkIRExprVec_2( mkexpr(t1
), mkU32( opc
& 0xFF) )
13288 putIReg(2, R_EAX
, unop(Iop_32to16
, mkexpr(t2
) ));
13290 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13291 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13292 stmt( IRStmt_Put( OFFB_CC_DEP1
,
13294 binop(Iop_Shr32
, mkexpr(t2
), mkU8(16)),
13295 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
13296 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
13297 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
13301 /* Set NDEP even though it isn't used. This makes redundant-PUT
13302 elimination of previous stores to this field work better. */
13303 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13305 case 0x27: DIP("daa\n"); break;
13306 case 0x2F: DIP("das\n"); break;
13307 case 0x37: DIP("aaa\n"); break;
13308 case 0x3F: DIP("aas\n"); break;
13309 default: vassert(0);
13313 case 0xD4: /* AAM */
13314 case 0xD5: /* AAD */
13315 d32
= getIByte(delta
); delta
++;
13316 if (sz
!= 4 || d32
!= 10) goto decode_failure
;
13317 t1
= newTemp(Ity_I32
);
13318 t2
= newTemp(Ity_I32
);
13319 /* Make up a 32-bit value (t1), with the old value of AX in the
13320 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13323 binop(Iop_16HLto32
,
13325 mk_x86g_calculate_eflags_all()),
13328 /* Call the helper fn, to get a new AX and OSZACP value, and
13329 poke both back into the guest state. Also pass the helper
13330 the actual opcode so it knows which of the 2 instructions it
13331 is doing the computation for. */
13334 Ity_I32
, 0/*regparm*/, "x86g_calculate_aad_aam",
13335 &x86g_calculate_aad_aam
,
13336 mkIRExprVec_2( mkexpr(t1
), mkU32( opc
& 0xFF) )
13338 putIReg(2, R_EAX
, unop(Iop_32to16
, mkexpr(t2
) ));
13340 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13341 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13342 stmt( IRStmt_Put( OFFB_CC_DEP1
,
13344 binop(Iop_Shr32
, mkexpr(t2
), mkU8(16)),
13345 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
13346 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
13347 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
13351 /* Set NDEP even though it isn't used. This makes
13352 redundant-PUT elimination of previous stores to this field
13354 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13356 DIP(opc
== 0xD4 ? "aam\n" : "aad\n");
13359 /* ------------------------ CWD/CDQ -------------------- */
13361 case 0x98: /* CBW */
13363 putIReg(4, R_EAX
, unop(Iop_16Sto32
, getIReg(2, R_EAX
)));
13367 putIReg(2, R_EAX
, unop(Iop_8Sto16
, getIReg(1, R_EAX
)));
13372 case 0x99: /* CWD/CDQ */
13375 binop(mkSizedOp(ty
,Iop_Sar8
),
13376 getIReg(sz
, R_EAX
),
13377 mkU8(sz
== 2 ? 15 : 31)) );
13378 DIP(sz
== 2 ? "cwdq\n" : "cdqq\n");
13381 /* ------------------------ FPU ops -------------------- */
13383 case 0x9E: /* SAHF */
13388 case 0x9F: /* LAHF */
13393 case 0x9B: /* FWAIT */
13406 Int delta0
= delta
;
13407 Bool decode_OK
= False
;
13408 delta
= dis_FPU ( &decode_OK
, sorb
, delta
);
13411 goto decode_failure
;
13416 /* ------------------------ INC & DEC ------------------ */
13418 case 0x40: /* INC eAX */
13419 case 0x41: /* INC eCX */
13420 case 0x42: /* INC eDX */
13421 case 0x43: /* INC eBX */
13422 case 0x44: /* INC eSP */
13423 case 0x45: /* INC eBP */
13424 case 0x46: /* INC eSI */
13425 case 0x47: /* INC eDI */
13426 vassert(sz
== 2 || sz
== 4);
13429 assign( t1
, binop(mkSizedOp(ty
,Iop_Add8
),
13430 getIReg(sz
, (UInt
)(opc
- 0x40)),
13432 setFlags_INC_DEC( True
, t1
, ty
);
13433 putIReg(sz
, (UInt
)(opc
- 0x40), mkexpr(t1
));
13434 DIP("inc%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x40));
13437 case 0x48: /* DEC eAX */
13438 case 0x49: /* DEC eCX */
13439 case 0x4A: /* DEC eDX */
13440 case 0x4B: /* DEC eBX */
13441 case 0x4C: /* DEC eSP */
13442 case 0x4D: /* DEC eBP */
13443 case 0x4E: /* DEC eSI */
13444 case 0x4F: /* DEC eDI */
13445 vassert(sz
== 2 || sz
== 4);
13448 assign( t1
, binop(mkSizedOp(ty
,Iop_Sub8
),
13449 getIReg(sz
, (UInt
)(opc
- 0x48)),
13451 setFlags_INC_DEC( False
, t1
, ty
);
13452 putIReg(sz
, (UInt
)(opc
- 0x48), mkexpr(t1
));
13453 DIP("dec%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x48));
13456 /* ------------------------ INT ------------------------ */
13458 case 0xCC: /* INT 3 */
13459 jmp_lit(&dres
, Ijk_SigTRAP
, ((Addr32
)guest_EIP_bbstart
)+delta
);
13460 vassert(dres
.whatNext
== Dis_StopHere
);
13464 case 0xCD: /* INT imm8 */
13465 d32
= getIByte(delta
); delta
++;
13467 /* For any of the cases where we emit a jump (that is, for all
13468 currently handled cases), it's important that all ArchRegs
13469 carry their up-to-date value at this point. So we declare an
13470 end-of-block here, which forces any TempRegs caching ArchRegs
13473 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13474 restart of this instruction (hence the "-2" two lines below,
13475 to get the restart EIP to be this instruction. This is
13476 probably Linux-specific and it would be more correct to only
13477 do this if the VexAbiInfo says that is what we should do.
13478 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13479 range (0x3F-0x49), and this allows some slack as well. */
13480 if (d32
>= 0x3F && d32
<= 0x4F) {
13481 jmp_lit(&dres
, Ijk_SigSEGV
, ((Addr32
)guest_EIP_bbstart
)+delta
-2);
13482 vassert(dres
.whatNext
== Dis_StopHere
);
13483 DIP("int $0x%x\n", d32
);
13487 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13488 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2
13489 (Solaris fasttrap syscalls). As part of this, note where we are, so we
13490 can back up the guest to this point if the syscall needs to
13492 IRJumpKind jump_kind
;
13495 jump_kind
= Ijk_Sys_int128
;
13498 jump_kind
= Ijk_Sys_int129
;
13501 jump_kind
= Ijk_Sys_int130
;
13504 jump_kind
= Ijk_Sys_int145
;
13507 jump_kind
= Ijk_Sys_int210
;
13510 /* none of the above */
13511 goto decode_failure
;
13514 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
13515 mkU32(guest_EIP_curr_instr
) ) );
13516 jmp_lit(&dres
, jump_kind
, ((Addr32
)guest_EIP_bbstart
)+delta
);
13517 vassert(dres
.whatNext
== Dis_StopHere
);
13518 DIP("int $0x%x\n", d32
);
13521 /* ------------------------ Jcond, byte offset --------- */
13523 case 0xEB: /* Jb (jump, byte offset) */
13524 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13526 jmp_lit(&dres
, Ijk_Boring
, d32
);
13527 vassert(dres
.whatNext
== Dis_StopHere
);
13528 DIP("jmp-8 0x%x\n", d32
);
13531 case 0xE9: /* Jv (jump, 16/32 offset) */
13532 vassert(sz
== 4); /* JRS added 2004 July 11 */
13533 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+sz
) + getSDisp(sz
,delta
);
13535 jmp_lit(&dres
, Ijk_Boring
, d32
);
13536 vassert(dres
.whatNext
== Dis_StopHere
);
13537 DIP("jmp 0x%x\n", d32
);
13542 case 0x72: /* JBb/JNAEb (jump below) */
13543 case 0x73: /* JNBb/JAEb (jump not below) */
13544 case 0x74: /* JZb/JEb (jump zero) */
13545 case 0x75: /* JNZb/JNEb (jump not zero) */
13546 case 0x76: /* JBEb/JNAb (jump below or equal) */
13547 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13548 case 0x78: /* JSb (jump negative) */
13549 case 0x79: /* JSb (jump not negative) */
13550 case 0x7A: /* JP (jump parity even) */
13551 case 0x7B: /* JNP/JPO (jump parity odd) */
13552 case 0x7C: /* JLb/JNGEb (jump less) */
13553 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13554 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13555 case 0x7F: /* JGb/JNLEb (jump greater) */
13557 const HChar
* comment
= "";
13558 jmpDelta
= (Int
)getSDisp8(delta
);
13559 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
13560 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + jmpDelta
;
13562 /* End the block at this point. */
13563 jcc_01( &dres
, (X86Condcode
)(opc
- 0x70),
13564 (Addr32
)(guest_EIP_bbstart
+delta
), d32
);
13565 vassert(dres
.whatNext
== Dis_StopHere
);
13566 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc
- 0x70), d32
, comment
);
13570 case 0xE3: /* JECXZ (for JCXZ see above) */
13571 if (sz
!= 4) goto decode_failure
;
13572 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13575 binop(Iop_CmpEQ32
, getIReg(4,R_ECX
), mkU32(0)),
13580 DIP("jecxz 0x%x\n", d32
);
13583 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13584 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13585 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13586 { /* Again, the docs say this uses ECX/CX as a count depending on
13587 the address size override, not the operand one. Since we
13588 don't handle address size overrides, I guess that means
13590 IRExpr
* zbit
= NULL
;
13591 IRExpr
* count
= NULL
;
13592 IRExpr
* cond
= NULL
;
13593 const HChar
* xtra
= NULL
;
13595 if (sz
!= 4) goto decode_failure
;
13596 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13598 putIReg(4, R_ECX
, binop(Iop_Sub32
, getIReg(4,R_ECX
), mkU32(1)));
13600 count
= getIReg(4,R_ECX
);
13601 cond
= binop(Iop_CmpNE32
, count
, mkU32(0));
13608 zbit
= mk_x86g_calculate_condition( X86CondZ
);
13609 cond
= mkAnd1(cond
, zbit
);
13613 zbit
= mk_x86g_calculate_condition( X86CondNZ
);
13614 cond
= mkAnd1(cond
, zbit
);
13619 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U32(d32
), OFFB_EIP
) );
13621 DIP("loop%s 0x%x\n", xtra
, d32
);
13625 /* ------------------------ IMUL ----------------------- */
13627 case 0x69: /* IMUL Iv, Ev, Gv */
13628 delta
= dis_imul_I_E_G ( sorb
, sz
, delta
, sz
);
13630 case 0x6B: /* IMUL Ib, Ev, Gv */
13631 delta
= dis_imul_I_E_G ( sorb
, sz
, delta
, 1 );
13634 /* ------------------------ MOV ------------------------ */
13636 case 0x88: /* MOV Gb,Eb */
13637 delta
= dis_mov_G_E(sorb
, 1, delta
);
13640 case 0x89: /* MOV Gv,Ev */
13641 delta
= dis_mov_G_E(sorb
, sz
, delta
);
13644 case 0x8A: /* MOV Eb,Gb */
13645 delta
= dis_mov_E_G(sorb
, 1, delta
);
13648 case 0x8B: /* MOV Ev,Gv */
13649 delta
= dis_mov_E_G(sorb
, sz
, delta
);
13652 case 0x8D: /* LEA M,Gv */
13654 goto decode_failure
;
13655 modrm
= getIByte(delta
);
13656 if (epartIsReg(modrm
))
13657 goto decode_failure
;
13658 /* NOTE! this is the one place where a segment override prefix
13659 has no effect on the address calculation. Therefore we pass
13660 zero instead of sorb here. */
13661 addr
= disAMode ( &alen
, /*sorb*/ 0, delta
, dis_buf
);
13663 putIReg(sz
, gregOfRM(modrm
), mkexpr(addr
));
13664 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
13665 nameIReg(sz
,gregOfRM(modrm
)));
13668 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13669 delta
= dis_mov_Sw_Ew(sorb
, sz
, delta
);
13672 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13673 delta
= dis_mov_Ew_Sw(sorb
, delta
);
13676 case 0xA0: /* MOV Ob,AL */
13678 /* Fall through ... */
13679 case 0xA1: /* MOV Ov,eAX */
13680 d32
= getUDisp32(delta
); delta
+= 4;
13682 addr
= newTemp(Ity_I32
);
13683 assign( addr
, handleSegOverride(sorb
, mkU32(d32
)) );
13684 putIReg(sz
, R_EAX
, loadLE(ty
, mkexpr(addr
)));
13685 DIP("mov%c %s0x%x, %s\n", nameISize(sz
), sorbTxt(sorb
),
13686 d32
, nameIReg(sz
,R_EAX
));
13689 case 0xA2: /* MOV Ob,AL */
13691 /* Fall through ... */
13692 case 0xA3: /* MOV eAX,Ov */
13693 d32
= getUDisp32(delta
); delta
+= 4;
13695 addr
= newTemp(Ity_I32
);
13696 assign( addr
, handleSegOverride(sorb
, mkU32(d32
)) );
13697 storeLE( mkexpr(addr
), getIReg(sz
,R_EAX
) );
13698 DIP("mov%c %s, %s0x%x\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
13699 sorbTxt(sorb
), d32
);
13702 case 0xB0: /* MOV imm,AL */
13703 case 0xB1: /* MOV imm,CL */
13704 case 0xB2: /* MOV imm,DL */
13705 case 0xB3: /* MOV imm,BL */
13706 case 0xB4: /* MOV imm,AH */
13707 case 0xB5: /* MOV imm,CH */
13708 case 0xB6: /* MOV imm,DH */
13709 case 0xB7: /* MOV imm,BH */
13710 d32
= getIByte(delta
); delta
+= 1;
13711 putIReg(1, opc
-0xB0, mkU8(d32
));
13712 DIP("movb $0x%x,%s\n", d32
, nameIReg(1,opc
-0xB0));
13715 case 0xB8: /* MOV imm,eAX */
13716 case 0xB9: /* MOV imm,eCX */
13717 case 0xBA: /* MOV imm,eDX */
13718 case 0xBB: /* MOV imm,eBX */
13719 case 0xBC: /* MOV imm,eSP */
13720 case 0xBD: /* MOV imm,eBP */
13721 case 0xBE: /* MOV imm,eSI */
13722 case 0xBF: /* MOV imm,eDI */
13723 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13724 putIReg(sz
, opc
-0xB8, mkU(szToITy(sz
), d32
));
13725 DIP("mov%c $0x%x,%s\n", nameISize(sz
), d32
, nameIReg(sz
,opc
-0xB8));
13728 case 0xC6: /* C6 /0 = MOV Ib,Eb */
13730 goto maybe_do_Mov_I_E
;
13731 case 0xC7: /* C7 /0 = MOV Iv,Ev */
13732 goto maybe_do_Mov_I_E
;
13735 modrm
= getIByte(delta
);
13736 if (gregOfRM(modrm
) == 0) {
13737 if (epartIsReg(modrm
)) {
13738 delta
++; /* mod/rm byte */
13739 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13740 putIReg(sz
, eregOfRM(modrm
), mkU(szToITy(sz
), d32
));
13741 DIP("mov%c $0x%x, %s\n", nameISize(sz
), d32
,
13742 nameIReg(sz
,eregOfRM(modrm
)));
13744 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
13746 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13747 storeLE(mkexpr(addr
), mkU(szToITy(sz
), d32
));
13748 DIP("mov%c $0x%x, %s\n", nameISize(sz
), d32
, dis_buf
);
13752 goto decode_failure
;
13754 /* ------------------------ opl imm, A ----------------- */
13756 case 0x04: /* ADD Ib, AL */
13757 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
13759 case 0x05: /* ADD Iv, eAX */
13760 delta
= dis_op_imm_A( sz
, False
, Iop_Add8
, True
, delta
, "add" );
13763 case 0x0C: /* OR Ib, AL */
13764 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
13766 case 0x0D: /* OR Iv, eAX */
13767 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
13770 case 0x14: /* ADC Ib, AL */
13771 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
13773 case 0x15: /* ADC Iv, eAX */
13774 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
13777 case 0x1C: /* SBB Ib, AL */
13778 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
13780 case 0x1D: /* SBB Iv, eAX */
13781 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
13784 case 0x24: /* AND Ib, AL */
13785 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
13787 case 0x25: /* AND Iv, eAX */
13788 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
13791 case 0x2C: /* SUB Ib, AL */
13792 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, True
, delta
, "sub" );
13794 case 0x2D: /* SUB Iv, eAX */
13795 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
13798 case 0x34: /* XOR Ib, AL */
13799 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
13801 case 0x35: /* XOR Iv, eAX */
13802 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
13805 case 0x3C: /* CMP Ib, AL */
13806 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
13808 case 0x3D: /* CMP Iv, eAX */
13809 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
13812 case 0xA8: /* TEST Ib, AL */
13813 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
13815 case 0xA9: /* TEST Iv, eAX */
13816 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
13819 /* ------------------------ opl Ev, Gv ----------------- */
13821 case 0x02: /* ADD Eb,Gb */
13822 delta
= dis_op2_E_G ( sorb
, False
, Iop_Add8
, True
, 1, delta
, "add" );
13824 case 0x03: /* ADD Ev,Gv */
13825 delta
= dis_op2_E_G ( sorb
, False
, Iop_Add8
, True
, sz
, delta
, "add" );
13828 case 0x0A: /* OR Eb,Gb */
13829 delta
= dis_op2_E_G ( sorb
, False
, Iop_Or8
, True
, 1, delta
, "or" );
13831 case 0x0B: /* OR Ev,Gv */
13832 delta
= dis_op2_E_G ( sorb
, False
, Iop_Or8
, True
, sz
, delta
, "or" );
13835 case 0x12: /* ADC Eb,Gb */
13836 delta
= dis_op2_E_G ( sorb
, True
, Iop_Add8
, True
, 1, delta
, "adc" );
13838 case 0x13: /* ADC Ev,Gv */
13839 delta
= dis_op2_E_G ( sorb
, True
, Iop_Add8
, True
, sz
, delta
, "adc" );
13842 case 0x1A: /* SBB Eb,Gb */
13843 delta
= dis_op2_E_G ( sorb
, True
, Iop_Sub8
, True
, 1, delta
, "sbb" );
13845 case 0x1B: /* SBB Ev,Gv */
13846 delta
= dis_op2_E_G ( sorb
, True
, Iop_Sub8
, True
, sz
, delta
, "sbb" );
13849 case 0x22: /* AND Eb,Gb */
13850 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, True
, 1, delta
, "and" );
13852 case 0x23: /* AND Ev,Gv */
13853 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, True
, sz
, delta
, "and" );
13856 case 0x2A: /* SUB Eb,Gb */
13857 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, True
, 1, delta
, "sub" );
13859 case 0x2B: /* SUB Ev,Gv */
13860 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, True
, sz
, delta
, "sub" );
13863 case 0x32: /* XOR Eb,Gb */
13864 delta
= dis_op2_E_G ( sorb
, False
, Iop_Xor8
, True
, 1, delta
, "xor" );
13866 case 0x33: /* XOR Ev,Gv */
13867 delta
= dis_op2_E_G ( sorb
, False
, Iop_Xor8
, True
, sz
, delta
, "xor" );
13870 case 0x3A: /* CMP Eb,Gb */
13871 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, False
, 1, delta
, "cmp" );
13873 case 0x3B: /* CMP Ev,Gv */
13874 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, False
, sz
, delta
, "cmp" );
13877 case 0x84: /* TEST Eb,Gb */
13878 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, False
, 1, delta
, "test" );
13880 case 0x85: /* TEST Ev,Gv */
13881 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, False
, sz
, delta
, "test" );
13884 /* ------------------------ opl Gv, Ev ----------------- */
13886 case 0x00: /* ADD Gb,Eb */
13887 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13888 Iop_Add8
, True
, 1, delta
, "add" );
13890 case 0x01: /* ADD Gv,Ev */
13891 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13892 Iop_Add8
, True
, sz
, delta
, "add" );
13895 case 0x08: /* OR Gb,Eb */
13896 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13897 Iop_Or8
, True
, 1, delta
, "or" );
13899 case 0x09: /* OR Gv,Ev */
13900 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13901 Iop_Or8
, True
, sz
, delta
, "or" );
13904 case 0x10: /* ADC Gb,Eb */
13905 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13906 Iop_Add8
, True
, 1, delta
, "adc" );
13908 case 0x11: /* ADC Gv,Ev */
13909 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13910 Iop_Add8
, True
, sz
, delta
, "adc" );
13913 case 0x18: /* SBB Gb,Eb */
13914 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13915 Iop_Sub8
, True
, 1, delta
, "sbb" );
13917 case 0x19: /* SBB Gv,Ev */
13918 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13919 Iop_Sub8
, True
, sz
, delta
, "sbb" );
13922 case 0x20: /* AND Gb,Eb */
13923 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13924 Iop_And8
, True
, 1, delta
, "and" );
13926 case 0x21: /* AND Gv,Ev */
13927 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13928 Iop_And8
, True
, sz
, delta
, "and" );
13931 case 0x28: /* SUB Gb,Eb */
13932 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13933 Iop_Sub8
, True
, 1, delta
, "sub" );
13935 case 0x29: /* SUB Gv,Ev */
13936 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13937 Iop_Sub8
, True
, sz
, delta
, "sub" );
13940 case 0x30: /* XOR Gb,Eb */
13941 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13942 Iop_Xor8
, True
, 1, delta
, "xor" );
13944 case 0x31: /* XOR Gv,Ev */
13945 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13946 Iop_Xor8
, True
, sz
, delta
, "xor" );
13949 case 0x38: /* CMP Gb,Eb */
13950 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13951 Iop_Sub8
, False
, 1, delta
, "cmp" );
13953 case 0x39: /* CMP Gv,Ev */
13954 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13955 Iop_Sub8
, False
, sz
, delta
, "cmp" );
13958 /* ------------------------ POP ------------------------ */
13960 case 0x58: /* POP eAX */
13961 case 0x59: /* POP eCX */
13962 case 0x5A: /* POP eDX */
13963 case 0x5B: /* POP eBX */
13964 case 0x5D: /* POP eBP */
13965 case 0x5E: /* POP eSI */
13966 case 0x5F: /* POP eDI */
13967 case 0x5C: /* POP eSP */
13968 vassert(sz
== 2 || sz
== 4);
13969 t1
= newTemp(szToITy(sz
)); t2
= newTemp(Ity_I32
);
13970 assign(t2
, getIReg(4, R_ESP
));
13971 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
13972 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t2
), mkU32(sz
)));
13973 putIReg(sz
, opc
-0x58, mkexpr(t1
));
13974 DIP("pop%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x58));
13977 case 0x9D: /* POPF */
13978 vassert(sz
== 2 || sz
== 4);
13979 t1
= newTemp(Ity_I32
); t2
= newTemp(Ity_I32
);
13980 assign(t2
, getIReg(4, R_ESP
));
13981 assign(t1
, widenUto32(loadLE(szToITy(sz
),mkexpr(t2
))));
13982 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t2
), mkU32(sz
)));
13984 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13986 set_EFLAGS_from_value( t1
, True
/*emit_AC_emwarn*/,
13987 ((Addr32
)guest_EIP_bbstart
)+delta
);
13989 DIP("popf%c\n", nameISize(sz
));
13992 case 0x61: /* POPA */
13993 /* This is almost certainly wrong for sz==2. So ... */
13994 if (sz
!= 4) goto decode_failure
;
13996 /* t5 is the old %ESP value. */
13997 t5
= newTemp(Ity_I32
);
13998 assign( t5
, getIReg(4, R_ESP
) );
14000 /* Reload all the registers, except %esp. */
14001 putIReg(4,R_EAX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(28)) ));
14002 putIReg(4,R_ECX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(24)) ));
14003 putIReg(4,R_EDX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(20)) ));
14004 putIReg(4,R_EBX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(16)) ));
14005 /* ignore saved %ESP */
14006 putIReg(4,R_EBP
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 8)) ));
14007 putIReg(4,R_ESI
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 4)) ));
14008 putIReg(4,R_EDI
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 0)) ));
14010 /* and move %ESP back up */
14011 putIReg( 4, R_ESP
, binop(Iop_Add32
, mkexpr(t5
), mkU32(8*4)) );
14013 DIP("popa%c\n", nameISize(sz
));
14016 case 0x8F: /* POPL/POPW m32 */
14018 UChar rm
= getIByte(delta
);
14020 /* make sure this instruction is correct POP */
14021 if (epartIsReg(rm
) || gregOfRM(rm
) != 0)
14022 goto decode_failure
;
14023 /* and has correct size */
14024 if (sz
!= 4 && sz
!= 2)
14025 goto decode_failure
;
14028 t1
= newTemp(Ity_I32
); /* stack address */
14029 t3
= newTemp(ty
); /* data */
14030 /* set t1 to ESP: t1 = ESP */
14031 assign( t1
, getIReg(4, R_ESP
) );
14032 /* load M[ESP] to virtual register t3: t3 = M[t1] */
14033 assign( t3
, loadLE(ty
, mkexpr(t1
)) );
14035 /* increase ESP; must be done before the STORE. Intel manual says:
14036 If the ESP register is used as a base register for addressing
14037 a destination operand in memory, the POP instruction computes
14038 the effective address of the operand after it increments the
14041 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t1
), mkU32(sz
)) );
14043 /* resolve MODR/M */
14044 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
14045 storeLE( mkexpr(addr
), mkexpr(t3
) );
14047 DIP("pop%c %s\n", sz
==2 ? 'w' : 'l', dis_buf
);
14053 case 0x1F: /* POP %DS */
14054 dis_pop_segreg( R_DS
, sz
); break;
14055 case 0x07: /* POP %ES */
14056 dis_pop_segreg( R_ES
, sz
); break;
14057 case 0x17: /* POP %SS */
14058 dis_pop_segreg( R_SS
, sz
); break;
14060 /* ------------------------ PUSH ----------------------- */
14062 case 0x50: /* PUSH eAX */
14063 case 0x51: /* PUSH eCX */
14064 case 0x52: /* PUSH eDX */
14065 case 0x53: /* PUSH eBX */
14066 case 0x55: /* PUSH eBP */
14067 case 0x56: /* PUSH eSI */
14068 case 0x57: /* PUSH eDI */
14069 case 0x54: /* PUSH eSP */
14070 /* This is the Right Way, in that the value to be pushed is
14071 established before %esp is changed, so that pushl %esp
14072 correctly pushes the old value. */
14073 vassert(sz
== 2 || sz
== 4);
14074 ty
= sz
==2 ? Ity_I16
: Ity_I32
;
14075 t1
= newTemp(ty
); t2
= newTemp(Ity_I32
);
14076 assign(t1
, getIReg(sz
, opc
-0x50));
14077 assign(t2
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(sz
)));
14078 putIReg(4, R_ESP
, mkexpr(t2
) );
14079 storeLE(mkexpr(t2
),mkexpr(t1
));
14080 DIP("push%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x50));
14084 case 0x68: /* PUSH Iv */
14085 d32
= getUDisp(sz
,delta
); delta
+= sz
;
14087 case 0x6A: /* PUSH Ib, sign-extended to sz */
14088 d32
= getSDisp8(delta
); delta
+= 1;
14092 t1
= newTemp(Ity_I32
); t2
= newTemp(ty
);
14093 assign( t1
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
14094 putIReg(4, R_ESP
, mkexpr(t1
) );
14095 /* stop mkU16 asserting if d32 is a negative 16-bit number
14099 storeLE( mkexpr(t1
), mkU(ty
,d32
) );
14100 DIP("push%c $0x%x\n", nameISize(sz
), d32
);
14103 case 0x9C: /* PUSHF */ {
14104 vassert(sz
== 2 || sz
== 4);
14106 t1
= newTemp(Ity_I32
);
14107 assign( t1
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
14108 putIReg(4, R_ESP
, mkexpr(t1
) );
14110 /* Calculate OSZACP, and patch in fixed fields as per
14112 - bit 1 is always 1
14113 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14115 t2
= newTemp(Ity_I32
);
14116 assign( t2
, binop(Iop_Or32
,
14117 mk_x86g_calculate_eflags_all(),
14118 mkU32( (1<<1)|(1<<9) ) ));
14120 /* Patch in the D flag. This can simply be a copy of bit 10 of
14121 baseBlock[OFFB_DFLAG]. */
14122 t3
= newTemp(Ity_I32
);
14123 assign( t3
, binop(Iop_Or32
,
14126 IRExpr_Get(OFFB_DFLAG
,Ity_I32
),
14130 /* And patch in the ID flag. */
14131 t4
= newTemp(Ity_I32
);
14132 assign( t4
, binop(Iop_Or32
,
14135 binop(Iop_Shl32
, IRExpr_Get(OFFB_IDFLAG
,Ity_I32
),
14140 /* And patch in the AC flag. */
14141 t5
= newTemp(Ity_I32
);
14142 assign( t5
, binop(Iop_Or32
,
14145 binop(Iop_Shl32
, IRExpr_Get(OFFB_ACFLAG
,Ity_I32
),
14150 /* if sz==2, the stored value needs to be narrowed. */
14152 storeLE( mkexpr(t1
), unop(Iop_32to16
,mkexpr(t5
)) );
14154 storeLE( mkexpr(t1
), mkexpr(t5
) );
14156 DIP("pushf%c\n", nameISize(sz
));
14160 case 0x60: /* PUSHA */
14161 /* This is almost certainly wrong for sz==2. So ... */
14162 if (sz
!= 4) goto decode_failure
;
14164 /* This is the Right Way, in that the value to be pushed is
14165 established before %esp is changed, so that pusha
14166 correctly pushes the old %esp value. New value of %esp is
14167 pushed at start. */
14168 /* t0 is the %ESP value we're going to push. */
14169 t0
= newTemp(Ity_I32
);
14170 assign( t0
, getIReg(4, R_ESP
) );
14172 /* t5 will be the new %ESP value. */
14173 t5
= newTemp(Ity_I32
);
14174 assign( t5
, binop(Iop_Sub32
, mkexpr(t0
), mkU32(8*4)) );
14176 /* Update guest state before prodding memory. */
14177 putIReg(4, R_ESP
, mkexpr(t5
));
14179 /* Dump all the registers. */
14180 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(28)), getIReg(4,R_EAX
) );
14181 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(24)), getIReg(4,R_ECX
) );
14182 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(20)), getIReg(4,R_EDX
) );
14183 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(16)), getIReg(4,R_EBX
) );
14184 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(12)), mkexpr(t0
) /*esp*/);
14185 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 8)), getIReg(4,R_EBP
) );
14186 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 4)), getIReg(4,R_ESI
) );
14187 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 0)), getIReg(4,R_EDI
) );
14189 DIP("pusha%c\n", nameISize(sz
));
14192 case 0x0E: /* PUSH %CS */
14193 dis_push_segreg( R_CS
, sz
); break;
14194 case 0x1E: /* PUSH %DS */
14195 dis_push_segreg( R_DS
, sz
); break;
14196 case 0x06: /* PUSH %ES */
14197 dis_push_segreg( R_ES
, sz
); break;
14198 case 0x16: /* PUSH %SS */
14199 dis_push_segreg( R_SS
, sz
); break;
14201 /* ------------------------ SCAS et al ----------------- */
14203 case 0xA4: /* MOVS, no REP prefix */
14206 goto decode_failure
; /* else dis_string_op asserts */
14207 dis_string_op( dis_MOVS
, ( opc
== 0xA4 ? 1 : sz
), "movs", sorb
);
14210 case 0xA6: /* CMPSb, no REP prefix */
14213 goto decode_failure
; /* else dis_string_op asserts */
14214 dis_string_op( dis_CMPS
, ( opc
== 0xA6 ? 1 : sz
), "cmps", sorb
);
14217 case 0xAA: /* STOS, no REP prefix */
14220 goto decode_failure
; /* else dis_string_op asserts */
14221 dis_string_op( dis_STOS
, ( opc
== 0xAA ? 1 : sz
), "stos", sorb
);
14224 case 0xAC: /* LODS, no REP prefix */
14227 goto decode_failure
; /* else dis_string_op asserts */
14228 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", sorb
);
14231 case 0xAE: /* SCAS, no REP prefix */
14234 goto decode_failure
; /* else dis_string_op asserts */
14235 dis_string_op( dis_SCAS
, ( opc
== 0xAE ? 1 : sz
), "scas", sorb
);
14239 case 0xFC: /* CLD */
14240 stmt( IRStmt_Put( OFFB_DFLAG
, mkU32(1)) );
14244 case 0xFD: /* STD */
14245 stmt( IRStmt_Put( OFFB_DFLAG
, mkU32(0xFFFFFFFF)) );
14249 case 0xF8: /* CLC */
14250 case 0xF9: /* STC */
14251 case 0xF5: /* CMC */
14252 t0
= newTemp(Ity_I32
);
14253 t1
= newTemp(Ity_I32
);
14254 assign( t0
, mk_x86g_calculate_eflags_all() );
14257 assign( t1
, binop(Iop_And32
, mkexpr(t0
),
14258 mkU32(~X86G_CC_MASK_C
)));
14262 assign( t1
, binop(Iop_Or32
, mkexpr(t0
),
14263 mkU32(X86G_CC_MASK_C
)));
14267 assign( t1
, binop(Iop_Xor32
, mkexpr(t0
),
14268 mkU32(X86G_CC_MASK_C
)));
14272 vpanic("disInstr(x86)(clc/stc/cmc)");
14274 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
14275 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
14276 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t1
) ));
14277 /* Set NDEP even though it isn't used. This makes redundant-PUT
14278 elimination of previous stores to this field work better. */
14279 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
14282 case 0xD6: /* SALC */
14283 t0
= newTemp(Ity_I32
);
14284 t1
= newTemp(Ity_I32
);
14285 assign( t0
, binop(Iop_And32
,
14286 mk_x86g_calculate_eflags_c(),
14288 assign( t1
, binop(Iop_Sar32
,
14289 binop(Iop_Shl32
, mkexpr(t0
), mkU8(31)),
14291 putIReg(1, R_EAX
, unop(Iop_32to8
, mkexpr(t1
)) );
14295 /* REPNE prefix insn */
14297 Addr32 eip_orig
= guest_EIP_bbstart
+ delta_start
;
14298 if (sorb
!= 0) goto decode_failure
;
14299 abyte
= getIByte(delta
); delta
++;
14301 if (abyte
== 0x66) { sz
= 2; abyte
= getIByte(delta
); delta
++; }
14304 /* According to the Intel manual, "repne movs" should never occur, but
14305 * in practice it has happened, so allow for it here... */
14306 case 0xA4: sz
= 1; /* REPNE MOVS<sz> fallthrough */
14308 dis_REP_op ( &dres
, X86CondNZ
, dis_MOVS
, sz
, eip_orig
,
14309 guest_EIP_bbstart
+delta
, "repne movs" );
14312 case 0xA6: sz
= 1; /* REPNE CMP<sz> fallthrough */
14314 dis_REP_op ( &dres
, X86CondNZ
, dis_CMPS
, sz
, eip_orig
,
14315 guest_EIP_bbstart
+delta
, "repne cmps" );
14318 case 0xAA: sz
= 1; /* REPNE STOS<sz> fallthrough */
14320 dis_REP_op ( &dres
, X86CondNZ
, dis_STOS
, sz
, eip_orig
,
14321 guest_EIP_bbstart
+delta
, "repne stos" );
14324 case 0xAE: sz
= 1; /* REPNE SCAS<sz> fallthrough */
14326 dis_REP_op ( &dres
, X86CondNZ
, dis_SCAS
, sz
, eip_orig
,
14327 guest_EIP_bbstart
+delta
, "repne scas" );
14331 goto decode_failure
;
14336 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14337 for the rest, it means REP) */
14339 Addr32 eip_orig
= guest_EIP_bbstart
+ delta_start
;
14340 abyte
= getIByte(delta
); delta
++;
14342 if (abyte
== 0x66) { sz
= 2; abyte
= getIByte(delta
); delta
++; }
14344 if (sorb
!= 0 && abyte
!= 0x0F) goto decode_failure
;
14348 switch (getIByte(delta
)) {
14349 /* On older CPUs, TZCNT behaves the same as BSF. */
14350 case 0xBC: /* REP BSF Gv,Ev */
14351 delta
= dis_bs_E_G ( sorb
, sz
, delta
+ 1, True
);
14353 /* On older CPUs, LZCNT behaves the same as BSR. */
14354 case 0xBD: /* REP BSR Gv,Ev */
14355 delta
= dis_bs_E_G ( sorb
, sz
, delta
+ 1, False
);
14358 goto decode_failure
;
14362 case 0xA4: sz
= 1; /* REP MOVS<sz> fallthrough */
14364 dis_REP_op ( &dres
, X86CondAlways
, dis_MOVS
, sz
, eip_orig
,
14365 guest_EIP_bbstart
+delta
, "rep movs" );
14368 case 0xA6: sz
= 1; /* REPE CMP<sz> fallthrough */
14370 dis_REP_op ( &dres
, X86CondZ
, dis_CMPS
, sz
, eip_orig
,
14371 guest_EIP_bbstart
+delta
, "repe cmps" );
14374 case 0xAA: sz
= 1; /* REP STOS<sz> fallthrough */
14376 dis_REP_op ( &dres
, X86CondAlways
, dis_STOS
, sz
, eip_orig
,
14377 guest_EIP_bbstart
+delta
, "rep stos" );
14380 case 0xAC: sz
= 1; /* REP LODS<sz> fallthrough */
14382 dis_REP_op ( &dres
, X86CondAlways
, dis_LODS
, sz
, eip_orig
,
14383 guest_EIP_bbstart
+delta
, "rep lods" );
14386 case 0xAE: sz
= 1; /* REPE SCAS<sz> fallthrough */
14388 dis_REP_op ( &dres
, X86CondZ
, dis_SCAS
, sz
, eip_orig
,
14389 guest_EIP_bbstart
+delta
, "repe scas" );
14392 case 0x90: /* REP NOP (PAUSE) */
14393 /* a hint to the P4 re spin-wait loop */
14394 DIP("rep nop (P4 pause)\n");
14395 /* "observe" the hint. The Vex client needs to be careful not
14396 to cause very long delays as a result, though. */
14397 jmp_lit(&dres
, Ijk_Yield
, ((Addr32
)guest_EIP_bbstart
)+delta
);
14398 vassert(dres
.whatNext
== Dis_StopHere
);
14401 case 0xC3: /* REP RET -- same as normal ret? */
14407 goto decode_failure
;
14412 /* ------------------------ XCHG ----------------------- */
14414 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14415 prefix; hence it must be translated with an IRCAS (at least, the
14416 memory variant). */
14417 case 0x86: /* XCHG Gb,Eb */
14419 /* Fall through ... */
14420 case 0x87: /* XCHG Gv,Ev */
14421 modrm
= getIByte(delta
);
14423 t1
= newTemp(ty
); t2
= newTemp(ty
);
14424 if (epartIsReg(modrm
)) {
14425 assign(t1
, getIReg(sz
, eregOfRM(modrm
)));
14426 assign(t2
, getIReg(sz
, gregOfRM(modrm
)));
14427 putIReg(sz
, gregOfRM(modrm
), mkexpr(t1
));
14428 putIReg(sz
, eregOfRM(modrm
), mkexpr(t2
));
14430 DIP("xchg%c %s, %s\n",
14431 nameISize(sz
), nameIReg(sz
,gregOfRM(modrm
)),
14432 nameIReg(sz
,eregOfRM(modrm
)));
14434 *expect_CAS
= True
;
14435 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14436 assign( t1
, loadLE(ty
,mkexpr(addr
)) );
14437 assign( t2
, getIReg(sz
,gregOfRM(modrm
)) );
14438 casLE( mkexpr(addr
),
14439 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
14440 putIReg( sz
, gregOfRM(modrm
), mkexpr(t1
) );
14442 DIP("xchg%c %s, %s\n", nameISize(sz
),
14443 nameIReg(sz
,gregOfRM(modrm
)), dis_buf
);
14447 case 0x90: /* XCHG eAX,eAX */
14450 case 0x91: /* XCHG eAX,eCX */
14451 case 0x92: /* XCHG eAX,eDX */
14452 case 0x93: /* XCHG eAX,eBX */
14453 case 0x94: /* XCHG eAX,eSP */
14454 case 0x95: /* XCHG eAX,eBP */
14455 case 0x96: /* XCHG eAX,eSI */
14456 case 0x97: /* XCHG eAX,eDI */
14457 codegen_xchg_eAX_Reg ( sz
, opc
- 0x90 );
14460 /* ------------------------ XLAT ----------------------- */
14462 case 0xD7: /* XLAT */
14463 if (sz
!= 4) goto decode_failure
; /* sz == 2 is also allowed (0x66) */
14472 unop(Iop_8Uto32
, getIReg(1, R_EAX
/*AL*/))))));
14474 DIP("xlat%c [ebx]\n", nameISize(sz
));
14477 /* ------------------------ IN / OUT ----------------------- */
14479 case 0xE4: /* IN imm8, AL */
14481 t1
= newTemp(Ity_I32
);
14482 abyte
= getIByte(delta
); delta
++;
14483 assign(t1
, mkU32( abyte
& 0xFF ));
14484 DIP("in%c $%d,%s\n", nameISize(sz
), abyte
, nameIReg(sz
,R_EAX
));
14486 case 0xE5: /* IN imm8, eAX */
14487 vassert(sz
== 2 || sz
== 4);
14488 t1
= newTemp(Ity_I32
);
14489 abyte
= getIByte(delta
); delta
++;
14490 assign(t1
, mkU32( abyte
& 0xFF ));
14491 DIP("in%c $%d,%s\n", nameISize(sz
), abyte
, nameIReg(sz
,R_EAX
));
14493 case 0xEC: /* IN %DX, AL */
14495 t1
= newTemp(Ity_I32
);
14496 assign(t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)));
14497 DIP("in%c %s,%s\n", nameISize(sz
), nameIReg(2,R_EDX
),
14498 nameIReg(sz
,R_EAX
));
14500 case 0xED: /* IN %DX, eAX */
14501 vassert(sz
== 2 || sz
== 4);
14502 t1
= newTemp(Ity_I32
);
14503 assign(t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)));
14504 DIP("in%c %s,%s\n", nameISize(sz
), nameIReg(2,R_EDX
),
14505 nameIReg(sz
,R_EAX
));
14508 /* At this point, sz indicates the width, and t1 is a 32-bit
14509 value giving port number. */
14511 vassert(sz
== 1 || sz
== 2 || sz
== 4);
14513 t2
= newTemp(Ity_I32
);
14514 d
= unsafeIRDirty_1_N(
14517 "x86g_dirtyhelper_IN",
14518 &x86g_dirtyhelper_IN
,
14519 mkIRExprVec_2( mkexpr(t1
), mkU32(sz
) )
14521 /* do the call, dumping the result in t2. */
14522 stmt( IRStmt_Dirty(d
) );
14523 putIReg(sz
, R_EAX
, narrowTo( ty
, mkexpr(t2
) ) );
14527 case 0xE6: /* OUT AL, imm8 */
14529 t1
= newTemp(Ity_I32
);
14530 abyte
= getIByte(delta
); delta
++;
14531 assign( t1
, mkU32( abyte
& 0xFF ) );
14532 DIP("out%c %s,$%d\n", nameISize(sz
), nameIReg(sz
,R_EAX
), abyte
);
14534 case 0xE7: /* OUT eAX, imm8 */
14535 vassert(sz
== 2 || sz
== 4);
14536 t1
= newTemp(Ity_I32
);
14537 abyte
= getIByte(delta
); delta
++;
14538 assign( t1
, mkU32( abyte
& 0xFF ) );
14539 DIP("out%c %s,$%d\n", nameISize(sz
), nameIReg(sz
,R_EAX
), abyte
);
14541 case 0xEE: /* OUT AL, %DX */
14543 t1
= newTemp(Ity_I32
);
14544 assign( t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)) );
14545 DIP("out%c %s,%s\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
14546 nameIReg(2,R_EDX
));
14548 case 0xEF: /* OUT eAX, %DX */
14549 vassert(sz
== 2 || sz
== 4);
14550 t1
= newTemp(Ity_I32
);
14551 assign( t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)) );
14552 DIP("out%c %s,%s\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
14553 nameIReg(2,R_EDX
));
14556 /* At this point, sz indicates the width, and t1 is a 32-bit
14557 value giving port number. */
14559 vassert(sz
== 1 || sz
== 2 || sz
== 4);
14561 d
= unsafeIRDirty_0_N(
14563 "x86g_dirtyhelper_OUT",
14564 &x86g_dirtyhelper_OUT
,
14565 mkIRExprVec_3( mkexpr(t1
),
14566 widenUto32( getIReg(sz
, R_EAX
) ),
14569 stmt( IRStmt_Dirty(d
) );
14573 /* ------------------------ (Grp1 extensions) ---------- */
14575 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14576 case 0x80, but only in 32-bit mode. */
14578 case 0x80: /* Grp1 Ib,Eb */
14579 modrm
= getIByte(delta
);
14580 am_sz
= lengthAMode(delta
);
14583 d32
= getUChar(delta
+ am_sz
);
14584 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14587 case 0x81: /* Grp1 Iv,Ev */
14588 modrm
= getIByte(delta
);
14589 am_sz
= lengthAMode(delta
);
14591 d32
= getUDisp(d_sz
, delta
+ am_sz
);
14592 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14595 case 0x83: /* Grp1 Ib,Ev */
14596 modrm
= getIByte(delta
);
14597 am_sz
= lengthAMode(delta
);
14599 d32
= getSDisp8(delta
+ am_sz
);
14600 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14603 /* ------------------------ (Grp2 extensions) ---------- */
14605 case 0xC0: { /* Grp2 Ib,Eb */
14606 Bool decode_OK
= True
;
14607 modrm
= getIByte(delta
);
14608 am_sz
= lengthAMode(delta
);
14610 d32
= getUChar(delta
+ am_sz
);
14612 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14613 mkU8(d32
& 0xFF), NULL
, &decode_OK
);
14615 goto decode_failure
;
14618 case 0xC1: { /* Grp2 Ib,Ev */
14619 Bool decode_OK
= True
;
14620 modrm
= getIByte(delta
);
14621 am_sz
= lengthAMode(delta
);
14623 d32
= getUChar(delta
+ am_sz
);
14624 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14625 mkU8(d32
& 0xFF), NULL
, &decode_OK
);
14627 goto decode_failure
;
14630 case 0xD0: { /* Grp2 1,Eb */
14631 Bool decode_OK
= True
;
14632 modrm
= getIByte(delta
);
14633 am_sz
= lengthAMode(delta
);
14637 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14638 mkU8(d32
), NULL
, &decode_OK
);
14640 goto decode_failure
;
14643 case 0xD1: { /* Grp2 1,Ev */
14644 Bool decode_OK
= True
;
14645 modrm
= getUChar(delta
);
14646 am_sz
= lengthAMode(delta
);
14649 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14650 mkU8(d32
), NULL
, &decode_OK
);
14652 goto decode_failure
;
14655 case 0xD2: { /* Grp2 CL,Eb */
14656 Bool decode_OK
= True
;
14657 modrm
= getUChar(delta
);
14658 am_sz
= lengthAMode(delta
);
14661 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14662 getIReg(1,R_ECX
), "%cl", &decode_OK
);
14664 goto decode_failure
;
14667 case 0xD3: { /* Grp2 CL,Ev */
14668 Bool decode_OK
= True
;
14669 modrm
= getIByte(delta
);
14670 am_sz
= lengthAMode(delta
);
14672 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14673 getIReg(1,R_ECX
), "%cl", &decode_OK
);
14675 goto decode_failure
;
14679 /* ------------------------ (Grp3 extensions) ---------- */
14681 case 0xF6: { /* Grp3 Eb */
14682 Bool decode_OK
= True
;
14683 delta
= dis_Grp3 ( sorb
, pfx_lock
, 1, delta
, &decode_OK
);
14685 goto decode_failure
;
14688 case 0xF7: { /* Grp3 Ev */
14689 Bool decode_OK
= True
;
14690 delta
= dis_Grp3 ( sorb
, pfx_lock
, sz
, delta
, &decode_OK
);
14692 goto decode_failure
;
14696 /* ------------------------ (Grp4 extensions) ---------- */
14698 case 0xFE: { /* Grp4 Eb */
14699 Bool decode_OK
= True
;
14700 delta
= dis_Grp4 ( sorb
, pfx_lock
, delta
, &decode_OK
);
14702 goto decode_failure
;
14706 /* ------------------------ (Grp5 extensions) ---------- */
14708 case 0xFF: { /* Grp5 Ev */
14709 Bool decode_OK
= True
;
14710 delta
= dis_Grp5 ( sorb
, pfx_lock
, sz
, delta
, &dres
, &decode_OK
);
14712 goto decode_failure
;
14716 /* ------------------------ Escapes to 2-byte opcodes -- */
14719 opc
= getIByte(delta
); delta
++;
14722 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14724 case 0xBA: { /* Grp8 Ib,Ev */
14725 Bool decode_OK
= False
;
14726 modrm
= getUChar(delta
);
14727 am_sz
= lengthAMode(delta
);
14728 d32
= getSDisp8(delta
+ am_sz
);
14729 delta
= dis_Grp8_Imm ( sorb
, pfx_lock
, delta
, modrm
,
14730 am_sz
, sz
, d32
, &decode_OK
);
14732 goto decode_failure
;
14736 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14738 case 0xBC: /* BSF Gv,Ev */
14739 delta
= dis_bs_E_G ( sorb
, sz
, delta
, True
);
14741 case 0xBD: /* BSR Gv,Ev */
14742 delta
= dis_bs_E_G ( sorb
, sz
, delta
, False
);
14745 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14747 case 0xC8: /* BSWAP %eax */
14754 case 0xCF: /* BSWAP %edi */
14755 /* According to the Intel and AMD docs, 16-bit BSWAP is undefined.
14756 * However, the result of a 16-bit BSWAP is always zero in every Intel
14757 * and AMD CPU, and some software depends on this behavior. */
14759 putIReg(2, opc
-0xC8, mkU16(0));
14760 DIP("bswapw %s\n", nameIReg(2, opc
-0xC8));
14761 } else if (sz
== 4) {
14762 t1
= newTemp(Ity_I32
);
14763 assign( t1
, getIReg(4, opc
-0xC8) );
14764 t2
= math_BSWAP(t1
, Ity_I32
);
14765 putIReg(4, opc
-0xC8, mkexpr(t2
));
14766 DIP("bswapl %s\n", nameIReg(4, opc
-0xC8));
14768 goto decode_failure
;
14772 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14774 case 0xA3: /* BT Gv,Ev */
14775 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpNone
);
14777 case 0xB3: /* BTR Gv,Ev */
14778 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpReset
);
14780 case 0xAB: /* BTS Gv,Ev */
14781 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpSet
);
14783 case 0xBB: /* BTC Gv,Ev */
14784 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpComp
);
14787 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14791 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14792 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14793 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14794 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14795 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14796 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14797 case 0x48: /* CMOVSb (cmov negative) */
14798 case 0x49: /* CMOVSb (cmov not negative) */
14799 case 0x4A: /* CMOVP (cmov parity even) */
14800 case 0x4B: /* CMOVNP (cmov parity odd) */
14801 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14802 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14803 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14804 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14805 delta
= dis_cmov_E_G(sorb
, sz
, (X86Condcode
)(opc
- 0x40), delta
);
14808 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14810 case 0xB0: /* CMPXCHG Gb,Eb */
14811 delta
= dis_cmpxchg_G_E ( sorb
, pfx_lock
, 1, delta
);
14813 case 0xB1: /* CMPXCHG Gv,Ev */
14814 delta
= dis_cmpxchg_G_E ( sorb
, pfx_lock
, sz
, delta
);
14817 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14818 IRTemp expdHi
= newTemp(Ity_I32
);
14819 IRTemp expdLo
= newTemp(Ity_I32
);
14820 IRTemp dataHi
= newTemp(Ity_I32
);
14821 IRTemp dataLo
= newTemp(Ity_I32
);
14822 IRTemp oldHi
= newTemp(Ity_I32
);
14823 IRTemp oldLo
= newTemp(Ity_I32
);
14824 IRTemp flags_old
= newTemp(Ity_I32
);
14825 IRTemp flags_new
= newTemp(Ity_I32
);
14826 IRTemp success
= newTemp(Ity_I1
);
14828 /* Translate this using a DCAS, even if there is no LOCK
14829 prefix. Life is too short to bother with generating two
14830 different translations for the with/without-LOCK-prefix
14832 *expect_CAS
= True
;
14834 /* Decode, and generate address. */
14835 if (sz
!= 4) goto decode_failure
;
14836 modrm
= getIByte(delta
);
14837 if (epartIsReg(modrm
)) goto decode_failure
;
14838 if (gregOfRM(modrm
) != 1) goto decode_failure
;
14839 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14842 /* Get the expected and new values. */
14843 assign( expdHi
, getIReg(4,R_EDX
) );
14844 assign( expdLo
, getIReg(4,R_EAX
) );
14845 assign( dataHi
, getIReg(4,R_ECX
) );
14846 assign( dataLo
, getIReg(4,R_EBX
) );
14850 mkIRCAS( oldHi
, oldLo
,
14851 Iend_LE
, mkexpr(addr
),
14852 mkexpr(expdHi
), mkexpr(expdLo
),
14853 mkexpr(dataHi
), mkexpr(dataLo
)
14856 /* success when oldHi:oldLo == expdHi:expdLo */
14858 binop(Iop_CasCmpEQ32
,
14860 binop(Iop_Xor32
, mkexpr(oldHi
), mkexpr(expdHi
)),
14861 binop(Iop_Xor32
, mkexpr(oldLo
), mkexpr(expdLo
))
14866 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14867 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14868 which is where they came from originally. Both the actual
14869 contents of these two regs, and any shadow values, are
14870 unchanged. If the DCAS fails then we're putting into
14871 EDX:EAX the value seen in memory. */
14873 IRExpr_ITE( mkexpr(success
),
14874 mkexpr(expdHi
), mkexpr(oldHi
)
14877 IRExpr_ITE( mkexpr(success
),
14878 mkexpr(expdLo
), mkexpr(oldLo
)
14881 /* Copy the success bit into the Z flag and leave the others
14883 assign( flags_old
, widenUto32(mk_x86g_calculate_eflags_all()));
14887 binop(Iop_And32
, mkexpr(flags_old
),
14888 mkU32(~X86G_CC_MASK_Z
)),
14891 unop(Iop_1Uto32
, mkexpr(success
)), mkU32(1)),
14892 mkU8(X86G_CC_SHIFT_Z
)) ));
14894 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
14895 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
14896 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
14897 /* Set NDEP even though it isn't used. This makes
14898 redundant-PUT elimination of previous stores to this field
14900 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
14902 /* Sheesh. Aren't you glad it was me and not you that had to
14903 write and validate all this grunge? */
14905 DIP("cmpxchg8b %s\n", dis_buf
);
14909 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14911 case 0xA2: { /* CPUID */
14912 /* Uses dirty helper:
14913 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14914 declared to mod eax, wr ebx, ecx, edx
14917 void* fAddr
= NULL
;
14918 const HChar
* fName
= NULL
;
14919 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE3
) {
14920 fName
= "x86g_dirtyhelper_CPUID_sse3";
14921 fAddr
= &x86g_dirtyhelper_CPUID_sse3
;
14924 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE2
) {
14925 fName
= "x86g_dirtyhelper_CPUID_sse2";
14926 fAddr
= &x86g_dirtyhelper_CPUID_sse2
;
14929 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE1
) {
14930 fName
= "x86g_dirtyhelper_CPUID_sse1";
14931 fAddr
= &x86g_dirtyhelper_CPUID_sse1
;
14934 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_MMXEXT
) {
14935 fName
= "x86g_dirtyhelper_CPUID_mmxext";
14936 fAddr
= &x86g_dirtyhelper_CPUID_mmxext
;
14939 if (archinfo
->hwcaps
== 0/*no SSE*/) {
14940 fName
= "x86g_dirtyhelper_CPUID_sse0";
14941 fAddr
= &x86g_dirtyhelper_CPUID_sse0
;
14943 vpanic("disInstr(x86)(cpuid)");
14945 vassert(fName
); vassert(fAddr
);
14946 d
= unsafeIRDirty_0_N ( 0/*regparms*/,
14947 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
14948 /* declare guest state effects */
14950 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
14951 d
->fxState
[0].fx
= Ifx_Modify
;
14952 d
->fxState
[0].offset
= OFFB_EAX
;
14953 d
->fxState
[0].size
= 4;
14954 d
->fxState
[1].fx
= Ifx_Write
;
14955 d
->fxState
[1].offset
= OFFB_EBX
;
14956 d
->fxState
[1].size
= 4;
14957 d
->fxState
[2].fx
= Ifx_Modify
;
14958 d
->fxState
[2].offset
= OFFB_ECX
;
14959 d
->fxState
[2].size
= 4;
14960 d
->fxState
[3].fx
= Ifx_Write
;
14961 d
->fxState
[3].offset
= OFFB_EDX
;
14962 d
->fxState
[3].size
= 4;
14963 /* execute the dirty call, side-effecting guest state */
14964 stmt( IRStmt_Dirty(d
) );
14965 /* CPUID is a serialising insn. So, just in case someone is
14966 using it as a memory fence ... */
14967 stmt( IRStmt_MBE(Imbe_Fence
) );
14972 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14973 //-- goto decode_failure;
14975 //-- t1 = newTemp(cb);
14976 //-- t2 = newTemp(cb);
14977 //-- t3 = newTemp(cb);
14978 //-- t4 = newTemp(cb);
14979 //-- uInstr0(cb, CALLM_S, 0);
14981 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14982 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14984 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14985 //-- uLiteral(cb, 0);
14986 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14988 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14989 //-- uLiteral(cb, 0);
14990 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14992 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14993 //-- uLiteral(cb, 0);
14994 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14996 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14997 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14999 //-- uInstr1(cb, POP, 4, TempReg, t4);
15000 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
15002 //-- uInstr1(cb, POP, 4, TempReg, t3);
15003 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
15005 //-- uInstr1(cb, POP, 4, TempReg, t2);
15006 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
15008 //-- uInstr1(cb, POP, 4, TempReg, t1);
15009 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
15011 //-- uInstr0(cb, CALLM_E, 0);
15012 //-- DIP("cpuid\n");
15015 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
15017 case 0xB6: /* MOVZXb Eb,Gv */
15018 if (sz
!= 2 && sz
!= 4)
15019 goto decode_failure
;
15020 delta
= dis_movx_E_G ( sorb
, delta
, 1, sz
, False
);
15023 case 0xB7: /* MOVZXw Ew,Gv */
15025 goto decode_failure
;
15026 delta
= dis_movx_E_G ( sorb
, delta
, 2, 4, False
);
15029 case 0xBE: /* MOVSXb Eb,Gv */
15030 if (sz
!= 2 && sz
!= 4)
15031 goto decode_failure
;
15032 delta
= dis_movx_E_G ( sorb
, delta
, 1, sz
, True
);
15035 case 0xBF: /* MOVSXw Ew,Gv */
15036 if (sz
!= 4 && /* accept movsww, sigh, see #250799 */sz
!= 2)
15037 goto decode_failure
;
15038 delta
= dis_movx_E_G ( sorb
, delta
, 2, sz
, True
);
15041 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
15043 //-- case 0xC3: /* MOVNTI Gv,Ev */
15044 //-- vg_assert(sz == 4);
15045 //-- modrm = getUChar(eip);
15046 //-- vg_assert(!epartIsReg(modrm));
15047 //-- t1 = newTemp(cb);
15048 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
15049 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
15050 //-- t2 = LOW24(pair);
15051 //-- eip += HI8(pair);
15052 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
15053 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
15056 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
15058 case 0xAF: /* IMUL Ev, Gv */
15059 delta
= dis_mul_E_G ( sorb
, sz
, delta
);
15062 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
15065 modrm
= getUChar(delta
);
15066 if (epartIsReg(modrm
)) goto decode_failure
;
15067 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15069 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
15072 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
15075 case 0x82: /* JBb/JNAEb (jump below) */
15076 case 0x83: /* JNBb/JAEb (jump not below) */
15077 case 0x84: /* JZb/JEb (jump zero) */
15078 case 0x85: /* JNZb/JNEb (jump not zero) */
15079 case 0x86: /* JBEb/JNAb (jump below or equal) */
15080 case 0x87: /* JNBEb/JAb (jump not below or equal) */
15081 case 0x88: /* JSb (jump negative) */
15082 case 0x89: /* JSb (jump not negative) */
15083 case 0x8A: /* JP (jump parity even) */
15084 case 0x8B: /* JNP/JPO (jump parity odd) */
15085 case 0x8C: /* JLb/JNGEb (jump less) */
15086 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15087 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15088 case 0x8F: /* JGb/JNLEb (jump greater) */
15090 const HChar
* comment
= "";
15091 jmpDelta
= (Int
)getUDisp32(delta
);
15092 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+4) + jmpDelta
;
15094 /* End the block at this point. */
15095 jcc_01( &dres
, (X86Condcode
)(opc
- 0x80),
15096 (Addr32
)(guest_EIP_bbstart
+delta
), d32
);
15097 vassert(dres
.whatNext
== Dis_StopHere
);
15098 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc
- 0x80), d32
, comment
);
15102 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15103 case 0x31: { /* RDTSC */
15104 IRTemp val
= newTemp(Ity_I64
);
15105 IRExpr
** args
= mkIRExprVec_0();
15106 IRDirty
* d
= unsafeIRDirty_1_N (
15109 "x86g_dirtyhelper_RDTSC",
15110 &x86g_dirtyhelper_RDTSC
,
15113 /* execute the dirty call, dumping the result in val. */
15114 stmt( IRStmt_Dirty(d
) );
15115 putIReg(4, R_EDX
, unop(Iop_64HIto32
, mkexpr(val
)));
15116 putIReg(4, R_EAX
, unop(Iop_64to32
, mkexpr(val
)));
15121 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15123 case 0xA1: /* POP %FS */
15124 dis_pop_segreg( R_FS
, sz
); break;
15125 case 0xA9: /* POP %GS */
15126 dis_pop_segreg( R_GS
, sz
); break;
15128 case 0xA0: /* PUSH %FS */
15129 dis_push_segreg( R_FS
, sz
); break;
15130 case 0xA8: /* PUSH %GS */
15131 dis_push_segreg( R_GS
, sz
); break;
15133 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15136 case 0x92: /* set-Bb/set-NAEb (jump below) */
15137 case 0x93: /* set-NBb/set-AEb (jump not below) */
15138 case 0x94: /* set-Zb/set-Eb (jump zero) */
15139 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15140 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15141 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15142 case 0x98: /* set-Sb (jump negative) */
15143 case 0x99: /* set-Sb (jump not negative) */
15144 case 0x9A: /* set-P (jump parity even) */
15145 case 0x9B: /* set-NP (jump parity odd) */
15146 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15147 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15148 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15149 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15150 t1
= newTemp(Ity_I8
);
15151 assign( t1
, unop(Iop_1Uto8
,mk_x86g_calculate_condition(opc
-0x90)) );
15152 modrm
= getIByte(delta
);
15153 if (epartIsReg(modrm
)) {
15155 putIReg(1, eregOfRM(modrm
), mkexpr(t1
));
15156 DIP("set%s %s\n", name_X86Condcode(opc
-0x90),
15157 nameIReg(1,eregOfRM(modrm
)));
15159 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15161 storeLE( mkexpr(addr
), mkexpr(t1
) );
15162 DIP("set%s %s\n", name_X86Condcode(opc
-0x90), dis_buf
);
15166 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15168 case 0xA4: /* SHLDv imm8,Gv,Ev */
15169 modrm
= getIByte(delta
);
15170 d32
= delta
+ lengthAMode(delta
);
15171 vex_sprintf(dis_buf
, "$%d", getIByte(d32
));
15172 delta
= dis_SHLRD_Gv_Ev (
15173 sorb
, delta
, modrm
, sz
,
15174 mkU8(getIByte(d32
)), True
, /* literal */
15177 case 0xA5: /* SHLDv %cl,Gv,Ev */
15178 modrm
= getIByte(delta
);
15179 delta
= dis_SHLRD_Gv_Ev (
15180 sorb
, delta
, modrm
, sz
,
15181 getIReg(1,R_ECX
), False
, /* not literal */
15185 case 0xAC: /* SHRDv imm8,Gv,Ev */
15186 modrm
= getIByte(delta
);
15187 d32
= delta
+ lengthAMode(delta
);
15188 vex_sprintf(dis_buf
, "$%d", getIByte(d32
));
15189 delta
= dis_SHLRD_Gv_Ev (
15190 sorb
, delta
, modrm
, sz
,
15191 mkU8(getIByte(d32
)), True
, /* literal */
15194 case 0xAD: /* SHRDv %cl,Gv,Ev */
15195 modrm
= getIByte(delta
);
15196 delta
= dis_SHLRD_Gv_Ev (
15197 sorb
, delta
, modrm
, sz
,
15198 getIReg(1,R_ECX
), False
, /* not literal */
15202 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15205 /* Simple implementation needing a long explaination.
15207 sysenter is a kind of syscall entry. The key thing here
15208 is that the return address is not known -- that is
15209 something that is beyond Vex's knowledge. So this IR
15210 forces a return to the scheduler, which can do what it
15211 likes to simulate the systenter, but it MUST set this
15212 thread's guest_EIP field with the continuation address
15213 before resuming execution. If that doesn't happen, the
15214 thread will jump to address zero, which is probably
15218 /* Note where we are, so we can back up the guest to this
15219 point if the syscall needs to be restarted. */
15220 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
15221 mkU32(guest_EIP_curr_instr
) ) );
15222 jmp_lit(&dres
, Ijk_Sys_sysenter
, 0/*bogus next EIP value*/);
15223 vassert(dres
.whatNext
== Dis_StopHere
);
15227 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15229 case 0xC0: { /* XADD Gb,Eb */
15231 delta
= dis_xadd_G_E ( sorb
, pfx_lock
, 1, delta
, &decodeOK
);
15232 if (!decodeOK
) goto decode_failure
;
15235 case 0xC1: { /* XADD Gv,Ev */
15237 delta
= dis_xadd_G_E ( sorb
, pfx_lock
, sz
, delta
, &decodeOK
);
15238 if (!decodeOK
) goto decode_failure
;
15242 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15246 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15248 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15249 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15250 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15251 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15255 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15258 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15261 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15265 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15268 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15271 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15273 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15274 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15276 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15280 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15284 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15286 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15287 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15288 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15292 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15296 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15298 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15299 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15300 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15301 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15303 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15307 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15311 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15314 Int delta0
= delta
-1;
15315 Bool decode_OK
= False
;
15317 /* If sz==2 this is SSE, and we assume sse idec has
15318 already spotted those cases by now. */
15320 goto decode_failure
;
15322 delta
= dis_MMX ( &decode_OK
, sorb
, sz
, delta
-1 );
15325 goto decode_failure
;
15330 case 0x0E: /* FEMMS */
15331 case 0x77: /* EMMS */
15333 goto decode_failure
;
15334 do_EMMS_preamble();
15338 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15339 case 0x01: /* 0F 01 /0 -- SGDT */
15340 /* 0F 01 /1 -- SIDT */
15342 /* This is really revolting, but ... since each processor
15343 (core) only has one IDT and one GDT, just let the guest
15344 see it (pass-through semantics). I can't see any way to
15345 construct a faked-up value, so don't bother to try. */
15346 modrm
= getUChar(delta
);
15347 if (epartIsReg(modrm
)) goto decode_failure
;
15348 if (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)
15349 goto decode_failure
;
15350 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15352 switch (gregOfRM(modrm
)) {
15353 case 0: DIP("sgdt %s\n", dis_buf
); break;
15354 case 1: DIP("sidt %s\n", dis_buf
); break;
15355 default: vassert(0); /*NOTREACHED*/
15358 IRDirty
* d
= unsafeIRDirty_0_N (
15360 "x86g_dirtyhelper_SxDT",
15361 &x86g_dirtyhelper_SxDT
,
15362 mkIRExprVec_2( mkexpr(addr
),
15363 mkU32(gregOfRM(modrm
)) )
15365 /* declare we're writing memory */
15366 d
->mFx
= Ifx_Write
;
15367 d
->mAddr
= mkexpr(addr
);
15369 stmt( IRStmt_Dirty(d
) );
15373 case 0x05: /* AMD's syscall */
15374 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
15375 mkU32(guest_EIP_curr_instr
) ) );
15376 jmp_lit(&dres
, Ijk_Sys_syscall
, ((Addr32
)guest_EIP_bbstart
)+delta
);
15377 vassert(dres
.whatNext
== Dis_StopHere
);
15381 /* =-=-=-=-=-=-=-=-=-=- UD2 =-=-=-=-=-=-=-=-=-=-=-= */
15383 case 0x0B: /* UD2 */
15384 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_curr_instr
) ) );
15385 jmp_lit(&dres
, Ijk_NoDecode
, guest_EIP_curr_instr
);
15386 vassert(dres
.whatNext
== Dis_StopHere
);
15390 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15393 goto decode_failure
;
15394 } /* switch (opc) for the 2-byte opcodes */
15395 goto decode_success
;
15396 } /* case 0x0F: of primary opcode */
15398 /* ------------------------ ??? ------------------------ */
15402 /* All decode failures end up here. */
15404 vex_printf("vex x86->IR: unhandled instruction bytes: "
15405 "0x%x 0x%x 0x%x 0x%x\n",
15406 getIByte(delta_start
+0),
15407 getIByte(delta_start
+1),
15408 getIByte(delta_start
+2),
15409 getIByte(delta_start
+3));
15412 /* Tell the dispatcher that this insn cannot be decoded, and so has
15413 not been executed, and (is currently) the next to be executed.
15414 EIP should be up-to-date since it made so at the start of each
15415 insn, but nevertheless be paranoid and update it again right
15417 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_curr_instr
) ) );
15418 jmp_lit(&dres
, Ijk_NoDecode
, guest_EIP_curr_instr
);
15419 vassert(dres
.whatNext
== Dis_StopHere
);
15421 /* We also need to say that a CAS is not expected now, regardless
15422 of what it might have been set to at the start of the function,
15423 since the IR that we've emitted just above (to synthesis a
15424 SIGILL) does not involve any CAS, and presumably no other IR has
15425 been emitted for this (non-decoded) insn. */
15426 *expect_CAS
= False
;
15429 } /* switch (opc) for the main (primary) opcode switch. */
15432 /* All decode successes end up here. */
15433 switch (dres
.whatNext
) {
15435 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_bbstart
+ delta
) ) );
15444 dres
.len
= delta
- delta_start
;
15452 /*------------------------------------------------------------*/
15453 /*--- Top-level fn ---*/
15454 /*------------------------------------------------------------*/
15456 /* Disassemble a single instruction into IR. The instruction
15457 is located in host memory at &guest_code[delta]. */
15459 DisResult
disInstr_X86 ( IRSB
* irsb_IN
,
15460 const UChar
* guest_code_IN
,
15463 VexArch guest_arch
,
15464 const VexArchInfo
* archinfo
,
15465 const VexAbiInfo
* abiinfo
,
15466 VexEndness host_endness_IN
,
15467 Bool sigill_diag_IN
)
15470 Bool expect_CAS
, has_CAS
;
15473 /* Set globals (see top of this file) */
15474 vassert(guest_arch
== VexArchX86
);
15475 guest_code
= guest_code_IN
;
15477 host_endness
= host_endness_IN
;
15478 guest_EIP_curr_instr
= (Addr32
)guest_IP
;
15479 guest_EIP_bbstart
= (Addr32
)toUInt(guest_IP
- delta
);
15481 x1
= irsb_IN
->stmts_used
;
15482 expect_CAS
= False
;
15483 dres
= disInstr_X86_WRK ( &expect_CAS
,
15484 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
15485 x2
= irsb_IN
->stmts_used
;
15488 /* See comment at the top of disInstr_X86_WRK for meaning of
15489 expect_CAS. Here, we (sanity-)check for the presence/absence of
15490 IRCAS as directed by the returned expect_CAS value. */
15492 for (i
= x1
; i
< x2
; i
++) {
15493 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
15497 if (expect_CAS
!= has_CAS
) {
15498 /* inconsistency detected. re-disassemble the instruction so as
15499 to generate a useful error message; then assert. */
15500 vex_traceflags
|= VEX_TRACE_FE
;
15501 dres
= disInstr_X86_WRK ( &expect_CAS
,
15502 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
15503 for (i
= x1
; i
< x2
; i
++) {
15504 vex_printf("\t\t");
15505 ppIRStmt(irsb_IN
->stmts
[i
]);
15508 /* Failure of this assertion is serious and denotes a bug in
15510 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15517 /*--------------------------------------------------------------------*/
15518 /*--- end guest_x86_toIR.c ---*/
15519 /*--------------------------------------------------------------------*/