2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness
;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar
* guest_code
;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart
;
195 /* The guest address for the instruction currently being
197 static Addr64 guest_RIP_curr_instr
;
199 /* The IRSB* into which we're generating code. */
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
214 static Addr64 guest_RIP_next_assumed
;
215 static Bool guest_RIP_next_mustcheck
;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp
newTemp ( IRType ty
)
225 vassert(isPlausibleIRType(ty
));
226 return newIRTemp( irsb
->tyenv
, ty
);
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt
* st
)
232 addStmtToIRSB( irsb
, st
);
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst
, IRExpr
* e
)
238 stmt( IRStmt_WrTmp(dst
, e
) );
241 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
243 return IRExpr_Unop(op
, a
);
246 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
248 return IRExpr_Binop(op
, a1
, a2
);
251 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
253 return IRExpr_Triop(op
, a1
, a2
, a3
);
256 static IRExpr
* mkexpr ( IRTemp tmp
)
258 return IRExpr_RdTmp(tmp
);
261 static IRExpr
* mkU8 ( ULong i
)
264 return IRExpr_Const(IRConst_U8( (UChar
)i
));
267 static IRExpr
* mkU16 ( ULong i
)
269 vassert(i
< 0x10000ULL
);
270 return IRExpr_Const(IRConst_U16( (UShort
)i
));
273 static IRExpr
* mkU32 ( ULong i
)
275 vassert(i
< 0x100000000ULL
);
276 return IRExpr_Const(IRConst_U32( (UInt
)i
));
279 static IRExpr
* mkU64 ( ULong i
)
281 return IRExpr_Const(IRConst_U64(i
));
284 static IRExpr
* mkU ( IRType ty
, ULong i
)
287 case Ity_I8
: return mkU8(i
);
288 case Ity_I16
: return mkU16(i
);
289 case Ity_I32
: return mkU32(i
);
290 case Ity_I64
: return mkU64(i
);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
297 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
300 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
302 return IRExpr_Load(Iend_LE
, ty
, addr
);
305 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
307 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
309 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
310 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
311 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
312 || op8
== Iop_CasCmpNE8
313 || op8
== Iop_Not8
);
315 case Ity_I8
: return 0 +op8
;
316 case Ity_I16
: return 1 +op8
;
317 case Ity_I32
: return 2 +op8
;
318 case Ity_I64
: return 3 +op8
;
319 default: vpanic("mkSizedOp(amd64)");
324 IRExpr
* doScalarWidening ( Int szSmall
, Int szBig
, Bool signd
, IRExpr
* src
)
326 if (szSmall
== 1 && szBig
== 4) {
327 return unop(signd
? Iop_8Sto32
: Iop_8Uto32
, src
);
329 if (szSmall
== 1 && szBig
== 2) {
330 return unop(signd
? Iop_8Sto16
: Iop_8Uto16
, src
);
332 if (szSmall
== 2 && szBig
== 4) {
333 return unop(signd
? Iop_16Sto32
: Iop_16Uto32
, src
);
335 if (szSmall
== 1 && szBig
== 8 && !signd
) {
336 return unop(Iop_8Uto64
, src
);
338 if (szSmall
== 1 && szBig
== 8 && signd
) {
339 return unop(Iop_8Sto64
, src
);
341 if (szSmall
== 2 && szBig
== 8 && !signd
) {
342 return unop(Iop_16Uto64
, src
);
344 if (szSmall
== 2 && szBig
== 8 && signd
) {
345 return unop(Iop_16Sto64
, src
);
347 vpanic("doScalarWidening(amd64)");
351 void putGuarded ( Int gstOffB
, IRExpr
* guard
, IRExpr
* value
)
353 IRType ty
= typeOfIRExpr(irsb
->tyenv
, value
);
354 stmt( IRStmt_Put(gstOffB
,
355 IRExpr_ITE(guard
, value
, IRExpr_Get(gstOffB
, ty
))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn
))
365 static void unimplemented ( const HChar
* str
)
367 vex_printf("amd64toIR: unimplemented feature\n");
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
469 /* This is the Intel register encoding -- segment regs. */
478 /* Various simple conversions */
480 static ULong
extend_s_8to64 ( UChar x
)
482 return (ULong
)((Long
)(((ULong
)x
) << 56) >> 56);
485 static ULong
extend_s_16to64 ( UShort x
)
487 return (ULong
)((Long
)(((ULong
)x
) << 48) >> 48);
490 static ULong
extend_s_32to64 ( UInt x
)
492 return (ULong
)((Long
)(((ULong
)x
) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
499 static Bool
epartIsReg ( UChar mod_reg_rm
)
501 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
508 static Int
gregLO3ofRM ( UChar mod_reg_rm
)
510 return (Int
)( (mod_reg_rm
>> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
515 static Int
eregLO3ofRM ( UChar mod_reg_rm
)
517 return (Int
)(mod_reg_rm
& 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar
getUChar ( Long delta
)
524 UChar v
= guest_code
[delta
+0];
528 static UInt
getUDisp16 ( Long delta
)
530 UInt v
= guest_code
[delta
+1]; v
<<= 8;
531 v
|= guest_code
[delta
+0];
535 //.. static UInt getUDisp ( Int size, Long delta )
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
543 //.. return 0; /*notreached*/
547 /* Get a byte value out of the insn stream and sign-extend to 64
549 static Long
getSDisp8 ( Long delta
)
551 return extend_s_8to64( guest_code
[delta
] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
556 static Long
getSDisp16 ( Long delta
)
558 UInt v
= guest_code
[delta
+1]; v
<<= 8;
559 v
|= guest_code
[delta
+0];
560 return extend_s_16to64( (UShort
)v
);
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
565 static Long
getSDisp32 ( Long delta
)
567 UInt v
= guest_code
[delta
+3]; v
<<= 8;
568 v
|= guest_code
[delta
+2]; v
<<= 8;
569 v
|= guest_code
[delta
+1]; v
<<= 8;
570 v
|= guest_code
[delta
+0];
571 return extend_s_32to64( v
);
574 /* Get a 64-bit value out of the insn stream. */
575 static Long
getDisp64 ( Long delta
)
578 v
|= guest_code
[delta
+7]; v
<<= 8;
579 v
|= guest_code
[delta
+6]; v
<<= 8;
580 v
|= guest_code
[delta
+5]; v
<<= 8;
581 v
|= guest_code
[delta
+4]; v
<<= 8;
582 v
|= guest_code
[delta
+3]; v
<<= 8;
583 v
|= guest_code
[delta
+2]; v
<<= 8;
584 v
|= guest_code
[delta
+1]; v
<<= 8;
585 v
|= guest_code
[delta
+0];
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long
getSDisp ( Int size
, Long delta
)
594 case 4: return getSDisp32(delta
);
595 case 2: return getSDisp16(delta
);
596 case 1: return getSDisp8(delta
);
597 default: vpanic("getSDisp(amd64)");
601 static ULong
mkSizeMask ( Int sz
)
604 case 1: return 0x00000000000000FFULL
;
605 case 2: return 0x000000000000FFFFULL
;
606 case 4: return 0x00000000FFFFFFFFULL
;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL
;
608 default: vpanic("mkSzMask(amd64)");
612 static Int
imin ( Int a
, Int b
)
614 return (a
< b
) ? a
: b
;
617 static IRType
szToITy ( Int n
)
620 case 1: return Ity_I8
;
621 case 2: return Ity_I16
;
622 case 4: return Ity_I32
;
623 case 8: return Ity_I64
;
624 default: vex_printf("\nszToITy(%d)\n", n
);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
647 * F2 and F3 will not both be 1.
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool
IS_VALID_PFX ( Prefix pfx
) {
682 return toBool((pfx
& 0xFF000000) == PFX_EMPTY
);
685 static Bool
haveREX ( Prefix pfx
) {
686 return toBool(pfx
& PFX_REX
);
689 static Int
getRexW ( Prefix pfx
) {
690 return (pfx
& PFX_REXW
) ? 1 : 0;
692 static Int
getRexR ( Prefix pfx
) {
693 return (pfx
& PFX_REXR
) ? 1 : 0;
695 static Int
getRexX ( Prefix pfx
) {
696 return (pfx
& PFX_REXX
) ? 1 : 0;
698 static Int
getRexB ( Prefix pfx
) {
699 return (pfx
& PFX_REXB
) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool
haveF2orF3 ( Prefix pfx
) {
705 return toBool((pfx
& (PFX_F2
|PFX_F3
)) > 0);
707 static Bool
haveF2andF3 ( Prefix pfx
) {
708 return toBool((pfx
& (PFX_F2
|PFX_F3
)) == (PFX_F2
|PFX_F3
));
710 static Bool
haveF2 ( Prefix pfx
) {
711 return toBool((pfx
& PFX_F2
) > 0);
713 static Bool
haveF3 ( Prefix pfx
) {
714 return toBool((pfx
& PFX_F3
) > 0);
717 static Bool
have66 ( Prefix pfx
) {
718 return toBool((pfx
& PFX_66
) > 0);
720 static Bool
haveASO ( Prefix pfx
) {
721 return toBool((pfx
& PFX_ASO
) > 0);
723 static Bool
haveLOCK ( Prefix pfx
) {
724 return toBool((pfx
& PFX_LOCK
) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool
have66noF2noF3 ( Prefix pfx
)
731 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_66
);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool
haveF2no66noF3 ( Prefix pfx
)
738 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F2
);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool
haveF3no66noF2 ( Prefix pfx
)
745 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F3
);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool
haveF3noF2 ( Prefix pfx
)
752 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F3
);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool
haveF2noF3 ( Prefix pfx
)
759 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F2
);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool
haveNoF2noF3 ( Prefix pfx
)
766 toBool((pfx
& (PFX_F2
|PFX_F3
)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool
haveNo66noF2noF3 ( Prefix pfx
)
773 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool
have66orF2orF3 ( Prefix pfx
)
779 return toBool( ! haveNo66noF2noF3(pfx
) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool
have66orF3 ( Prefix pfx
)
785 return toBool((pfx
& (PFX_66
|PFX_F3
)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix
clearSegBits ( Prefix p
)
792 p
& ~(PFX_CS
| PFX_DS
| PFX_ES
| PFX_FS
| PFX_GS
| PFX_SS
);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt
getVexNvvvv ( Prefix pfx
) {
798 r
/= (UInt
)PFX_VEXnV0
; /* pray this turns into a shift */
802 static Bool
haveVEX ( Prefix pfx
) {
803 return toBool(pfx
& PFX_VEX
);
806 static Int
getVexL ( Prefix pfx
) {
807 return (pfx
& PFX_VEXL
) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
824 ESC_NONE
=0xF0000000, // none
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int
integerGuestReg64Offset ( UInt reg
)
891 case R_RAX
: return OFFB_RAX
;
892 case R_RCX
: return OFFB_RCX
;
893 case R_RDX
: return OFFB_RDX
;
894 case R_RBX
: return OFFB_RBX
;
895 case R_RSP
: return OFFB_RSP
;
896 case R_RBP
: return OFFB_RBP
;
897 case R_RSI
: return OFFB_RSI
;
898 case R_RDI
: return OFFB_RDI
;
899 case R_R8
: return OFFB_R8
;
900 case R_R9
: return OFFB_R9
;
901 case R_R10
: return OFFB_R10
;
902 case R_R11
: return OFFB_R11
;
903 case R_R12
: return OFFB_R12
;
904 case R_R13
: return OFFB_R13
;
905 case R_R14
: return OFFB_R14
;
906 case R_R15
: return OFFB_R15
;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present and where the denoted
916 sub-register is bits 15:8 of the containing 64-bit register. */
919 const HChar
* nameIReg ( Int sz
, UInt reg
, Bool irregular
)
921 static const HChar
* ireg64_names
[16]
922 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
923 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
924 static const HChar
* ireg32_names
[16]
925 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
926 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
927 static const HChar
* ireg16_names
[16]
928 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
929 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
930 static const HChar
* ireg8_names
[16]
931 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
932 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
933 static const HChar
* ireg8_irregular
[4]
934 = { "%ah", "%ch", "%dh", "%bh" };
939 vassert(reg
>= 4 && reg
< 8);
941 vassert(irregular
== False
);
945 case 8: return ireg64_names
[reg
];
946 case 4: return ireg32_names
[reg
];
947 case 2: return ireg16_names
[reg
];
948 case 1: if (irregular
) {
949 vassert(reg
>= 4 && reg
< 8);
950 return ireg8_irregular
[reg
- 4];
952 return ireg8_names
[reg
];
954 default: vpanic("nameIReg(amd64)");
958 /* Using the same argument conventions as nameIReg, produce the
959 guest state offset of an integer register. */
962 Int
offsetIReg ( Int sz
, UInt reg
, Bool irregular
)
967 vassert(reg
>= 4 && reg
< 8);
969 vassert(irregular
== False
);
972 /* Deal with irregular case -- sz==1 and no REX present */
973 if (sz
== 1 && irregular
) {
975 case R_RSP
: return 1+ OFFB_RAX
;
976 case R_RBP
: return 1+ OFFB_RCX
;
977 case R_RSI
: return 1+ OFFB_RDX
;
978 case R_RDI
: return 1+ OFFB_RBX
;
979 default: break; /* use the normal case */
984 return integerGuestReg64Offset(reg
);
988 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
990 static IRExpr
* getIRegCL ( void )
992 vassert(host_endness
== VexEndnessLE
);
993 return unop(Iop_64to8
, IRExpr_Get( OFFB_RCX
, Ity_I64
));
997 /* Write to the %AH register. */
999 static void putIRegAH ( IRExpr
* e
)
1001 vassert(host_endness
== VexEndnessLE
);
1002 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I8
);
1003 stmt( IRStmt_Put( OFFB_RAX
+1, e
) );
1007 /* Read/write various widths of %RAX, as it has various
1008 special-purpose uses. */
1010 static const HChar
* nameIRegRAX ( Int sz
)
1013 case 1: return "%al";
1014 case 2: return "%ax";
1015 case 4: return "%eax";
1016 case 8: return "%rax";
1017 default: vpanic("nameIRegRAX(amd64)");
1021 static IRExpr
* getIRegRAX ( Int sz
)
1023 vassert(host_endness
== VexEndnessLE
);
1025 case 1: return unop(Iop_64to8
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1026 case 2: return unop(Iop_64to16
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1027 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1028 case 8: return IRExpr_Get( OFFB_RAX
, Ity_I64
);
1029 default: vpanic("getIRegRAX(amd64)");
1033 static void putIRegRAX ( Int sz
, IRExpr
* e
)
1035 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1036 vassert(host_endness
== VexEndnessLE
);
1038 case 8: vassert(ty
== Ity_I64
);
1039 stmt( IRStmt_Put( OFFB_RAX
, e
));
1041 case 4: vassert(ty
== Ity_I32
);
1042 stmt( IRStmt_Put( OFFB_RAX
, unop(Iop_32Uto64
,e
) ));
1044 case 2: vassert(ty
== Ity_I16
);
1045 stmt( IRStmt_Put( OFFB_RAX
, e
));
1047 case 1: vassert(ty
== Ity_I8
);
1048 stmt( IRStmt_Put( OFFB_RAX
, e
));
1050 default: vpanic("putIRegRAX(amd64)");
1055 /* Read/write various widths of %RDX, as it has various
1056 special-purpose uses. */
1058 static const HChar
* nameIRegRDX ( Int sz
)
1061 case 1: return "%dl";
1062 case 2: return "%dx";
1063 case 4: return "%edx";
1064 case 8: return "%rdx";
1065 default: vpanic("nameIRegRDX(amd64)");
1069 static IRExpr
* getIRegRDX ( Int sz
)
1071 vassert(host_endness
== VexEndnessLE
);
1073 case 1: return unop(Iop_64to8
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1074 case 2: return unop(Iop_64to16
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1075 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1076 case 8: return IRExpr_Get( OFFB_RDX
, Ity_I64
);
1077 default: vpanic("getIRegRDX(amd64)");
1081 static void putIRegRDX ( Int sz
, IRExpr
* e
)
1083 vassert(host_endness
== VexEndnessLE
);
1084 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1086 case 8: stmt( IRStmt_Put( OFFB_RDX
, e
));
1088 case 4: stmt( IRStmt_Put( OFFB_RDX
, unop(Iop_32Uto64
,e
) ));
1090 case 2: stmt( IRStmt_Put( OFFB_RDX
, e
));
1092 case 1: stmt( IRStmt_Put( OFFB_RDX
, e
));
1094 default: vpanic("putIRegRDX(amd64)");
1099 /* Simplistic functions to deal with the integer registers as a
1100 straightforward bank of 16 64-bit regs. */
1102 static IRExpr
* getIReg64 ( UInt regno
)
1104 return IRExpr_Get( integerGuestReg64Offset(regno
),
1108 static void putIReg64 ( UInt regno
, IRExpr
* e
)
1110 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1111 stmt( IRStmt_Put( integerGuestReg64Offset(regno
), e
) );
1114 static const HChar
* nameIReg64 ( UInt regno
)
1116 return nameIReg( 8, regno
, False
);
1120 /* Simplistic functions to deal with the lower halves of integer
1121 registers as a straightforward bank of 16 32-bit regs. */
1123 static IRExpr
* getIReg32 ( UInt regno
)
1125 vassert(host_endness
== VexEndnessLE
);
1126 return unop(Iop_64to32
,
1127 IRExpr_Get( integerGuestReg64Offset(regno
),
1131 static void putIReg32 ( UInt regno
, IRExpr
* e
)
1133 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1134 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1135 unop(Iop_32Uto64
,e
) ) );
1138 static const HChar
* nameIReg32 ( UInt regno
)
1140 return nameIReg( 4, regno
, False
);
1144 /* Simplistic functions to deal with the lower quarters of integer
1145 registers as a straightforward bank of 16 16-bit regs. */
1147 static IRExpr
* getIReg16 ( UInt regno
)
1149 vassert(host_endness
== VexEndnessLE
);
1150 return unop(Iop_64to16
,
1151 IRExpr_Get( integerGuestReg64Offset(regno
),
1155 static void putIReg16 ( UInt regno
, IRExpr
* e
)
1157 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
1158 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1159 unop(Iop_16Uto64
,e
) ) );
1162 static const HChar
* nameIReg16 ( UInt regno
)
1164 return nameIReg( 2, regno
, False
);
1168 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1169 which field of the REX byte is to be used to extend to a 4-bit
1170 number. These functions cater for that situation.
1172 static IRExpr
* getIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1174 vassert(lo3bits
< 8);
1175 vassert(IS_VALID_PFX(pfx
));
1176 return getIReg64( lo3bits
| (getRexX(pfx
) << 3) );
1179 static const HChar
* nameIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1181 vassert(lo3bits
< 8);
1182 vassert(IS_VALID_PFX(pfx
));
1183 return nameIReg( 8, lo3bits
| (getRexX(pfx
) << 3), False
);
1186 static const HChar
* nameIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1188 vassert(lo3bits
< 8);
1189 vassert(IS_VALID_PFX(pfx
));
1190 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1191 UInt regNo
= lo3bits
| (getRexB(pfx
) << 3);
1192 return nameIReg( sz
, regNo
,
1193 toBool(sz
==1 && !haveREX(pfx
) && regNo
>= 4 && regNo
< 8));
1196 static IRExpr
* getIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1198 vassert(lo3bits
< 8);
1199 vassert(IS_VALID_PFX(pfx
));
1200 UInt regNo
= (getRexB(pfx
) << 3) | lo3bits
;
1204 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1209 return unop(Iop_64to32
,
1211 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1216 return unop(Iop_64to16
,
1218 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1223 Bool irregular
= !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1226 offsetIReg( 1, regNo
, True
/*irregular*/ ),
1230 return unop(Iop_64to8
,
1232 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1238 vpanic("getIRegRexB");
1243 static void putIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
, IRExpr
* e
)
1245 vassert(lo3bits
< 8);
1246 vassert(IS_VALID_PFX(pfx
));
1247 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1248 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1249 Bool irregular
= sz
== 1 && !haveREX(pfx
) && lo3bits
>= 4 && lo3bits
< 8;
1251 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3), irregular
),
1252 sz
==4 ? unop(Iop_32Uto64
,e
) : e
1257 /* Functions for getting register numbers from modrm bytes and REX
1258 when we don't have to consider the complexities of integer subreg
1261 /* Extract the g reg field from a modRM byte, and augment it using the
1262 REX.R bit from the supplied REX byte. The R bit usually is
1263 associated with the g register field.
1265 static UInt
gregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1267 Int reg
= (Int
)( (mod_reg_rm
>> 3) & 7 );
1268 reg
+= (pfx
& PFX_REXR
) ? 8 : 0;
1272 /* Extract the e reg field from a modRM byte, and augment it using the
1273 REX.B bit from the supplied REX byte. The B bit usually is
1274 associated with the e register field (when modrm indicates e is a
1277 static UInt
eregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1280 vassert(epartIsReg(mod_reg_rm
));
1281 rm
= (Int
)(mod_reg_rm
& 0x7);
1282 rm
+= (pfx
& PFX_REXB
) ? 8 : 0;
1287 /* General functions for dealing with integer register access. */
1289 /* Produce the guest state offset for a reference to the 'g' register
1290 field in a modrm byte, taking into account REX (or its absence),
1291 and the size of the access.
1293 static UInt
offsetIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1296 vassert(host_endness
== VexEndnessLE
);
1297 vassert(IS_VALID_PFX(pfx
));
1298 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1299 reg
= gregOfRexRM( pfx
, mod_reg_rm
);
1300 Bool irregular
= sz
== 1 && !haveREX(pfx
) && reg
>= 4 && reg
< 8;
1301 return offsetIReg( sz
, reg
, irregular
);
1305 IRExpr
* getIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1309 return IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
), Ity_I64
);
1312 return unop(Iop_64to32
,
1313 IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
), Ity_I64
));
1316 return unop(Iop_64to16
,
1317 IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
), Ity_I64
));
1320 UInt regNo
= gregOfRexRM( pfx
, mod_reg_rm
);
1321 Bool irregular
= !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1323 return IRExpr_Get( offsetIRegG( 1, pfx
, mod_reg_rm
), Ity_I8
);
1325 return unop(Iop_64to8
,
1326 IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
),
1337 void putIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1339 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1341 e
= unop(Iop_32Uto64
,e
);
1343 stmt( IRStmt_Put( offsetIRegG( sz
, pfx
, mod_reg_rm
), e
) );
1347 const HChar
* nameIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1349 UInt regNo
= gregOfRexRM( pfx
, mod_reg_rm
);
1350 Bool irregular
= sz
== 1 && !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1351 return nameIReg( sz
, gregOfRexRM(pfx
,mod_reg_rm
), irregular
);
1356 IRExpr
* getIRegV ( Int sz
, Prefix pfx
)
1358 vassert(sz
== 8 || sz
== 4);
1360 return unop(Iop_64to32
,
1361 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx
), False
),
1363 } else if (sz
== 2) {
1364 return unop(Iop_64to16
,
1365 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx
), False
),
1368 return IRExpr_Get( offsetIReg( sz
, getVexNvvvv(pfx
), False
),
1374 void putIRegV ( Int sz
, Prefix pfx
, IRExpr
* e
)
1376 vassert(sz
== 8 || sz
== 4);
1377 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1379 e
= unop(Iop_32Uto64
,e
);
1381 stmt( IRStmt_Put( offsetIReg( sz
, getVexNvvvv(pfx
), False
), e
) );
1385 const HChar
* nameIRegV ( Int sz
, Prefix pfx
)
1387 vassert(sz
== 8 || sz
== 4);
1388 return nameIReg( sz
, getVexNvvvv(pfx
), False
);
1393 /* Produce the guest state offset for a reference to the 'e' register
1394 field in a modrm byte, taking into account REX (or its absence),
1395 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1396 denotes a memory access rather than a register access.
1398 static UInt
offsetIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1401 vassert(host_endness
== VexEndnessLE
);
1402 vassert(IS_VALID_PFX(pfx
));
1403 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1404 reg
= eregOfRexRM( pfx
, mod_reg_rm
);
1405 Bool irregular
= sz
== 1 && !haveREX(pfx
) && (reg
>= 4 && reg
< 8);
1406 return offsetIReg( sz
, reg
, irregular
);
1410 IRExpr
* getIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1414 return IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
), Ity_I64
);
1417 return unop(Iop_64to32
,
1418 IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
), Ity_I64
));
1421 return unop(Iop_64to16
,
1422 IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
), Ity_I64
));
1425 UInt regNo
= eregOfRexRM( pfx
, mod_reg_rm
);
1426 Bool irregular
= !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1428 return IRExpr_Get( offsetIRegE( 1, pfx
, mod_reg_rm
), Ity_I8
);
1430 return unop(Iop_64to8
,
1431 IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
),
1442 void putIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1444 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1446 e
= unop(Iop_32Uto64
,e
);
1448 stmt( IRStmt_Put( offsetIRegE( sz
, pfx
, mod_reg_rm
), e
) );
1452 const HChar
* nameIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1454 UInt regNo
= eregOfRexRM( pfx
, mod_reg_rm
);
1455 Bool irregular
= sz
== 1 && !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1456 return nameIReg( sz
, eregOfRexRM(pfx
,mod_reg_rm
), irregular
);
1460 /*------------------------------------------------------------*/
1461 /*--- For dealing with XMM registers ---*/
1462 /*------------------------------------------------------------*/
1464 static Int
ymmGuestRegOffset ( UInt ymmreg
)
1467 case 0: return OFFB_YMM0
;
1468 case 1: return OFFB_YMM1
;
1469 case 2: return OFFB_YMM2
;
1470 case 3: return OFFB_YMM3
;
1471 case 4: return OFFB_YMM4
;
1472 case 5: return OFFB_YMM5
;
1473 case 6: return OFFB_YMM6
;
1474 case 7: return OFFB_YMM7
;
1475 case 8: return OFFB_YMM8
;
1476 case 9: return OFFB_YMM9
;
1477 case 10: return OFFB_YMM10
;
1478 case 11: return OFFB_YMM11
;
1479 case 12: return OFFB_YMM12
;
1480 case 13: return OFFB_YMM13
;
1481 case 14: return OFFB_YMM14
;
1482 case 15: return OFFB_YMM15
;
1483 default: vpanic("ymmGuestRegOffset(amd64)");
1487 static Int
xmmGuestRegOffset ( UInt xmmreg
)
1489 /* Correct for little-endian host only. */
1490 vassert(host_endness
== VexEndnessLE
);
1491 return ymmGuestRegOffset( xmmreg
);
1494 /* Lanes of vector registers are always numbered from zero being the
1495 least significant lane (rightmost in the register). */
1497 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
1499 /* Correct for little-endian host only. */
1500 vassert(host_endness
== VexEndnessLE
);
1501 vassert(laneno
>= 0 && laneno
< 8);
1502 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
1505 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
1507 /* Correct for little-endian host only. */
1508 vassert(host_endness
== VexEndnessLE
);
1509 vassert(laneno
>= 0 && laneno
< 4);
1510 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
1513 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
1515 /* Correct for little-endian host only. */
1516 vassert(host_endness
== VexEndnessLE
);
1517 vassert(laneno
>= 0 && laneno
< 2);
1518 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
1521 static Int
ymmGuestRegLane128offset ( UInt ymmreg
, Int laneno
)
1523 /* Correct for little-endian host only. */
1524 vassert(host_endness
== VexEndnessLE
);
1525 vassert(laneno
>= 0 && laneno
< 2);
1526 return ymmGuestRegOffset( ymmreg
) + 16 * laneno
;
1529 static Int
ymmGuestRegLane64offset ( UInt ymmreg
, Int laneno
)
1531 /* Correct for little-endian host only. */
1532 vassert(host_endness
== VexEndnessLE
);
1533 vassert(laneno
>= 0 && laneno
< 4);
1534 return ymmGuestRegOffset( ymmreg
) + 8 * laneno
;
1537 static Int
ymmGuestRegLane32offset ( UInt ymmreg
, Int laneno
)
1539 /* Correct for little-endian host only. */
1540 vassert(host_endness
== VexEndnessLE
);
1541 vassert(laneno
>= 0 && laneno
< 8);
1542 return ymmGuestRegOffset( ymmreg
) + 4 * laneno
;
1545 static IRExpr
* getXMMReg ( UInt xmmreg
)
1547 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
1550 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
1552 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
1555 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
1557 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
1560 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
1562 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
1565 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
1567 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
1570 static IRExpr
* getXMMRegLane16 ( UInt xmmreg
, Int laneno
)
1572 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg
,laneno
), Ity_I16
);
1575 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
1577 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1578 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
1581 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1583 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1584 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1587 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1589 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1590 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1593 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1595 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1596 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1599 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1601 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1602 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1605 static IRExpr
* getYMMReg ( UInt xmmreg
)
1607 return IRExpr_Get( ymmGuestRegOffset(xmmreg
), Ity_V256
);
1610 static IRExpr
* getYMMRegLane128 ( UInt ymmreg
, Int laneno
)
1612 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg
,laneno
), Ity_V128
);
1615 static IRExpr
* getYMMRegLane64F ( UInt ymmreg
, Int laneno
)
1617 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_F64
);
1620 static IRExpr
* getYMMRegLane64 ( UInt ymmreg
, Int laneno
)
1622 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_I64
);
1625 static IRExpr
* getYMMRegLane32F ( UInt ymmreg
, Int laneno
)
1627 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_F32
);
1630 static IRExpr
* getYMMRegLane32 ( UInt ymmreg
, Int laneno
)
1632 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_I32
);
1635 static void putYMMReg ( UInt ymmreg
, IRExpr
* e
)
1637 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V256
);
1638 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg
), e
) );
1641 static void putYMMRegLane128 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1643 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1644 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg
,laneno
), e
) );
1647 static void putYMMRegLane64F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1649 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1650 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1653 static void putYMMRegLane64 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1655 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1656 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1659 static void putYMMRegLane32F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1661 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1662 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1665 static void putYMMRegLane32 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1667 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1668 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1671 static IRExpr
* mkV128 ( UShort mask
)
1673 return IRExpr_Const(IRConst_V128(mask
));
1676 /* Write the low half of a YMM reg and zero out the upper half. */
1677 static void putYMMRegLoAndZU ( UInt ymmreg
, IRExpr
* e
)
1679 putYMMRegLane128( ymmreg
, 0, e
);
1680 putYMMRegLane128( ymmreg
, 1, mkV128(0) );
1683 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
1685 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
1686 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
1687 return unop(Iop_64to1
,
1690 unop(Iop_1Uto64
,y
)));
1693 /* Generate a compare-and-swap operation, operating on memory at
1694 'addr'. The expected value is 'expVal' and the new value is
1695 'newVal'. If the operation fails, then transfer control (with a
1696 no-redir jump (XXX no -- see comment at top of this file)) to
1697 'restart_point', which is presumably the address of the guest
1698 instruction again -- retrying, essentially. */
1699 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
1700 Addr64 restart_point
)
1703 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
1704 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
1705 IRTemp oldTmp
= newTemp(tyE
);
1706 IRTemp expTmp
= newTemp(tyE
);
1707 vassert(tyE
== tyN
);
1708 vassert(tyE
== Ity_I64
|| tyE
== Ity_I32
1709 || tyE
== Ity_I16
|| tyE
== Ity_I8
);
1710 assign(expTmp
, expVal
);
1711 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
1712 NULL
, mkexpr(expTmp
), NULL
, newVal
);
1713 stmt( IRStmt_CAS(cas
) );
1715 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
1716 mkexpr(oldTmp
), mkexpr(expTmp
) ),
1717 Ijk_Boring
, /*Ijk_NoRedir*/
1718 IRConst_U64( restart_point
),
1724 /*------------------------------------------------------------*/
1725 /*--- Helpers for %rflags. ---*/
1726 /*------------------------------------------------------------*/
1728 /* -------------- Evaluating the flags-thunk. -------------- */
1730 /* Build IR to calculate all the eflags from stored
1731 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1733 static IRExpr
* mk_amd64g_calculate_rflags_all ( void )
1736 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1737 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1738 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1739 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1744 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all
,
1747 /* Exclude OP and NDEP from definedness checking. We're only
1748 interested in DEP1 and DEP2. */
1749 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1753 /* Build IR to calculate some particular condition from stored
1754 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1756 static IRExpr
* mk_amd64g_calculate_condition ( AMD64Condcode cond
)
1759 = mkIRExprVec_5( mkU64(cond
),
1760 IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1761 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1762 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1763 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1768 "amd64g_calculate_condition", &amd64g_calculate_condition
,
1771 /* Exclude the requested condition, OP and NDEP from definedness
1772 checking. We're only interested in DEP1 and DEP2. */
1773 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
1774 return unop(Iop_64to1
, call
);
1777 /* Build IR to calculate just the carry flag from stored
1778 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1779 static IRExpr
* mk_amd64g_calculate_rflags_c ( void )
1782 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1783 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1784 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1785 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1790 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c
,
1793 /* Exclude OP and NDEP from definedness checking. We're only
1794 interested in DEP1 and DEP2. */
1795 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1800 /* -------------- Building the flags-thunk. -------------- */
1802 /* The machinery in this section builds the flag-thunk following a
1803 flag-setting operation. Hence the various setFlags_* functions.
1806 static Bool
isAddSub ( IROp op8
)
1808 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1811 static Bool
isLogic ( IROp op8
)
1813 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
1816 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1817 static IRExpr
* widenUto64 ( IRExpr
* e
)
1819 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1820 case Ity_I64
: return e
;
1821 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1822 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1823 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1824 case Ity_I1
: return unop(Iop_1Uto64
, e
);
1825 default: vpanic("widenUto64");
1829 /* S-widen 8/16/32/64 bit int expr to 32. */
1830 static IRExpr
* widenSto64 ( IRExpr
* e
)
1832 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1833 case Ity_I64
: return e
;
1834 case Ity_I32
: return unop(Iop_32Sto64
, e
);
1835 case Ity_I16
: return unop(Iop_16Sto64
, e
);
1836 case Ity_I8
: return unop(Iop_8Sto64
, e
);
1837 default: vpanic("widenSto64");
1841 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1842 of these combinations make sense. */
1843 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
1845 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
1846 if (src_ty
== dst_ty
)
1848 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
1849 return unop(Iop_32to16
, e
);
1850 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
1851 return unop(Iop_32to8
, e
);
1852 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
)
1853 return unop(Iop_64to32
, e
);
1854 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I16
)
1855 return unop(Iop_64to16
, e
);
1856 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I8
)
1857 return unop(Iop_64to8
, e
);
1859 vex_printf("\nsrc, dst tys are: ");
1864 vpanic("narrowTo(amd64)");
1868 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1869 auto-sized up to the real op. */
1872 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
1876 case Ity_I8
: ccOp
= 0; break;
1877 case Ity_I16
: ccOp
= 1; break;
1878 case Ity_I32
: ccOp
= 2; break;
1879 case Ity_I64
: ccOp
= 3; break;
1880 default: vassert(0);
1883 case Iop_Add8
: ccOp
+= AMD64G_CC_OP_ADDB
; break;
1884 case Iop_Sub8
: ccOp
+= AMD64G_CC_OP_SUBB
; break;
1885 default: ppIROp(op8
);
1886 vpanic("setFlags_DEP1_DEP2(amd64)");
1888 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1889 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1890 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(dep2
))) );
1891 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
1895 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1898 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
1902 case Ity_I8
: ccOp
= 0; break;
1903 case Ity_I16
: ccOp
= 1; break;
1904 case Ity_I32
: ccOp
= 2; break;
1905 case Ity_I64
: ccOp
= 3; break;
1906 default: vassert(0);
1911 case Iop_Xor8
: ccOp
+= AMD64G_CC_OP_LOGICB
; break;
1912 default: ppIROp(op8
);
1913 vpanic("setFlags_DEP1(amd64)");
1915 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1916 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
1918 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
1922 /* For shift operations, we put in the result and the undershifted
1923 result. Except if the shift amount is zero, the thunk is left
1926 static void setFlags_DEP1_DEP2_shift ( IROp op64
,
1934 case Ity_I8
: ccOp
= 0; break;
1935 case Ity_I16
: ccOp
= 1; break;
1936 case Ity_I32
: ccOp
= 2; break;
1937 case Ity_I64
: ccOp
= 3; break;
1938 default: vassert(0);
1943 /* Both kinds of right shifts are handled by the same thunk
1947 case Iop_Sar64
: ccOp
+= AMD64G_CC_OP_SHRB
; break;
1948 case Iop_Shl64
: ccOp
+= AMD64G_CC_OP_SHLB
; break;
1949 default: ppIROp(op64
);
1950 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1953 /* guard :: Ity_I8. We need to convert it to I1. */
1954 IRTemp guardB
= newTemp(Ity_I1
);
1955 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
1957 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1958 stmt( IRStmt_Put( OFFB_CC_OP
,
1959 IRExpr_ITE( mkexpr(guardB
),
1961 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
1962 stmt( IRStmt_Put( OFFB_CC_DEP1
,
1963 IRExpr_ITE( mkexpr(guardB
),
1964 widenUto64(mkexpr(res
)),
1965 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
1966 stmt( IRStmt_Put( OFFB_CC_DEP2
,
1967 IRExpr_ITE( mkexpr(guardB
),
1968 widenUto64(mkexpr(resUS
)),
1969 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
1970 stmt( IRStmt_Put( OFFB_CC_NDEP
,
1975 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1976 the former value of the carry flag, which unfortunately we have to
1979 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1981 Int ccOp
= inc
? AMD64G_CC_OP_INCB
: AMD64G_CC_OP_DECB
;
1984 case Ity_I8
: ccOp
+= 0; break;
1985 case Ity_I16
: ccOp
+= 1; break;
1986 case Ity_I32
: ccOp
+= 2; break;
1987 case Ity_I64
: ccOp
+= 3; break;
1988 default: vassert(0);
1991 /* This has to come first, because calculating the C flag
1992 may require reading all four thunk fields. */
1993 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_amd64g_calculate_rflags_c()) );
1994 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1995 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(res
))) );
1996 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
2000 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
2004 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, ULong base_op
)
2008 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+0) ) );
2011 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+1) ) );
2014 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+2) ) );
2017 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+3) ) );
2020 vpanic("setFlags_MUL(amd64)");
2022 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(arg1
)) ));
2023 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(arg2
)) ));
2024 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
2028 /* -------------- Condition codes. -------------- */
2030 /* Condition codes, using the AMD encoding. */
2032 static const HChar
* name_AMD64Condcode ( AMD64Condcode cond
)
2035 case AMD64CondO
: return "o";
2036 case AMD64CondNO
: return "no";
2037 case AMD64CondB
: return "b";
2038 case AMD64CondNB
: return "ae"; /*"nb";*/
2039 case AMD64CondZ
: return "e"; /*"z";*/
2040 case AMD64CondNZ
: return "ne"; /*"nz";*/
2041 case AMD64CondBE
: return "be";
2042 case AMD64CondNBE
: return "a"; /*"nbe";*/
2043 case AMD64CondS
: return "s";
2044 case AMD64CondNS
: return "ns";
2045 case AMD64CondP
: return "p";
2046 case AMD64CondNP
: return "np";
2047 case AMD64CondL
: return "l";
2048 case AMD64CondNL
: return "ge"; /*"nl";*/
2049 case AMD64CondLE
: return "le";
2050 case AMD64CondNLE
: return "g"; /*"nle";*/
2051 case AMD64CondAlways
: return "ALWAYS";
2052 default: vpanic("name_AMD64Condcode");
2057 AMD64Condcode
positiveIse_AMD64Condcode ( AMD64Condcode cond
,
2058 /*OUT*/Bool
* needInvert
)
2060 vassert(cond
>= AMD64CondO
&& cond
<= AMD64CondNLE
);
2065 *needInvert
= False
;
2071 /* -------------- Helpers for ADD/SUB with carry. -------------- */
2073 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
2076 Optionally, generate a store for the 'tres' value. This can either
2077 be a normal store, or it can be a cas-with-possible-failure style
2080 if taddr is IRTemp_INVALID, then no store is generated.
2082 if taddr is not IRTemp_INVALID, then a store (using taddr as
2083 the address) is generated:
2085 if texpVal is IRTemp_INVALID then a normal store is
2086 generated, and restart_point must be zero (it is irrelevant).
2088 if texpVal is not IRTemp_INVALID then a cas-style store is
2089 generated. texpVal is the expected value, restart_point
2090 is the restart point if the store fails, and texpVal must
2091 have the same type as tres.
2094 static void helper_ADC ( Int sz
,
2095 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2096 /* info about optional store: */
2097 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2100 IRType ty
= szToITy(sz
);
2101 IRTemp oldc
= newTemp(Ity_I64
);
2102 IRTemp oldcn
= newTemp(ty
);
2103 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2104 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2106 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2109 case 8: thunkOp
= AMD64G_CC_OP_ADCQ
; break;
2110 case 4: thunkOp
= AMD64G_CC_OP_ADCL
; break;
2111 case 2: thunkOp
= AMD64G_CC_OP_ADCW
; break;
2112 case 1: thunkOp
= AMD64G_CC_OP_ADCB
; break;
2113 default: vassert(0);
2116 /* oldc = old carry flag, 0 or 1 */
2117 assign( oldc
, binop(Iop_And64
,
2118 mk_amd64g_calculate_rflags_c(),
2121 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2123 assign( tres
, binop(plus
,
2124 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2127 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2128 start of this function. */
2129 if (taddr
!= IRTemp_INVALID
) {
2130 if (texpVal
== IRTemp_INVALID
) {
2131 vassert(restart_point
== 0);
2132 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2134 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2135 /* .. and hence 'texpVal' has the same type as 'tres'. */
2136 casLE( mkexpr(taddr
),
2137 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2141 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2142 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2143 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2144 mkexpr(oldcn
)) )) );
2145 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2149 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2150 appropriately. As with helper_ADC, possibly generate a store of
2151 the result -- see comments on helper_ADC for details.
2153 static void helper_SBB ( Int sz
,
2154 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2155 /* info about optional store: */
2156 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2159 IRType ty
= szToITy(sz
);
2160 IRTemp oldc
= newTemp(Ity_I64
);
2161 IRTemp oldcn
= newTemp(ty
);
2162 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
2163 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2165 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2168 case 8: thunkOp
= AMD64G_CC_OP_SBBQ
; break;
2169 case 4: thunkOp
= AMD64G_CC_OP_SBBL
; break;
2170 case 2: thunkOp
= AMD64G_CC_OP_SBBW
; break;
2171 case 1: thunkOp
= AMD64G_CC_OP_SBBB
; break;
2172 default: vassert(0);
2175 /* oldc = old carry flag, 0 or 1 */
2176 assign( oldc
, binop(Iop_And64
,
2177 mk_amd64g_calculate_rflags_c(),
2180 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2182 assign( tres
, binop(minus
,
2183 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
2186 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2187 start of this function. */
2188 if (taddr
!= IRTemp_INVALID
) {
2189 if (texpVal
== IRTemp_INVALID
) {
2190 vassert(restart_point
== 0);
2191 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2193 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2194 /* .. and hence 'texpVal' has the same type as 'tres'. */
2195 casLE( mkexpr(taddr
),
2196 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2200 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2201 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
) )) );
2202 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2203 mkexpr(oldcn
)) )) );
2204 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2208 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2209 and set flags appropriately.
2211 static void helper_ADCX_ADOX ( Bool isADCX
, Int sz
,
2212 IRTemp tres
, IRTemp ta1
, IRTemp ta2
)
2215 IRType ty
= szToITy(sz
);
2216 IRTemp oldflags
= newTemp(Ity_I64
);
2217 IRTemp oldOC
= newTemp(Ity_I64
); // old O or C flag
2218 IRTemp oldOCn
= newTemp(ty
); // old O or C flag, narrowed
2219 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2220 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2222 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2225 case 8: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX64
2226 : AMD64G_CC_OP_ADOX64
; break;
2227 case 4: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX32
2228 : AMD64G_CC_OP_ADOX32
; break;
2229 default: vassert(0);
2232 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
2234 /* oldOC = old overflow/carry flag, 0 or 1 */
2235 assign( oldOC
, binop(Iop_And64
,
2238 mkU8(isADCX
? AMD64G_CC_SHIFT_C
2239 : AMD64G_CC_SHIFT_O
)),
2242 assign( oldOCn
, narrowTo(ty
, mkexpr(oldOC
)) );
2244 assign( tres
, binop(plus
,
2245 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2248 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2249 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2250 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2251 mkexpr(oldOCn
)) )) );
2252 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldflags
) ) );
2256 /* -------------- Helpers for disassembly printing. -------------- */
2258 static const HChar
* nameGrp1 ( Int opc_aux
)
2260 static const HChar
* grp1_names
[8]
2261 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2262 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(amd64)");
2263 return grp1_names
[opc_aux
];
2266 static const HChar
* nameGrp2 ( Int opc_aux
)
2268 static const HChar
* grp2_names
[8]
2269 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2270 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(amd64)");
2271 return grp2_names
[opc_aux
];
2274 static const HChar
* nameGrp4 ( Int opc_aux
)
2276 static const HChar
* grp4_names
[8]
2277 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2278 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(amd64)");
2279 return grp4_names
[opc_aux
];
2282 static const HChar
* nameGrp5 ( Int opc_aux
)
2284 static const HChar
* grp5_names
[8]
2285 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2286 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(amd64)");
2287 return grp5_names
[opc_aux
];
2290 static const HChar
* nameGrp8 ( Int opc_aux
)
2292 static const HChar
* grp8_names
[8]
2293 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2294 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(amd64)");
2295 return grp8_names
[opc_aux
];
2298 static const HChar
* nameSReg ( UInt sreg
)
2301 case R_ES
: return "%es";
2302 case R_CS
: return "%cs";
2303 case R_SS
: return "%ss";
2304 case R_DS
: return "%ds";
2305 case R_FS
: return "%fs";
2306 case R_GS
: return "%gs";
2307 default: vpanic("nameSReg(amd64)");
2311 static const HChar
* nameMMXReg ( Int mmxreg
)
2313 static const HChar
* mmx_names
[8]
2314 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2315 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(amd64,guest)");
2316 return mmx_names
[mmxreg
];
2319 static const HChar
* nameXMMReg ( Int xmmreg
)
2321 static const HChar
* xmm_names
[16]
2322 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2323 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2324 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2325 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2326 if (xmmreg
< 0 || xmmreg
> 15) vpanic("nameXMMReg(amd64)");
2327 return xmm_names
[xmmreg
];
2330 static const HChar
* nameMMXGran ( Int gran
)
2337 default: vpanic("nameMMXGran(amd64,guest)");
2341 static HChar
nameISize ( Int size
)
2348 default: vpanic("nameISize(amd64)");
2352 static const HChar
* nameYMMReg ( Int ymmreg
)
2354 static const HChar
* ymm_names
[16]
2355 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2356 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2357 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2358 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2359 if (ymmreg
< 0 || ymmreg
> 15) vpanic("nameYMMReg(amd64)");
2360 return ymm_names
[ymmreg
];
2364 /*------------------------------------------------------------*/
2365 /*--- JMP helpers ---*/
2366 /*------------------------------------------------------------*/
2368 static void jmp_lit( /*MOD*/DisResult
* dres
,
2369 IRJumpKind kind
, Addr64 d64
)
2371 vassert(dres
->whatNext
== Dis_Continue
);
2372 vassert(dres
->len
== 0);
2373 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2374 dres
->whatNext
= Dis_StopHere
;
2375 dres
->jk_StopHere
= kind
;
2376 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64
) ) );
2379 static void jmp_treg( /*MOD*/DisResult
* dres
,
2380 IRJumpKind kind
, IRTemp t
)
2382 vassert(dres
->whatNext
== Dis_Continue
);
2383 vassert(dres
->len
== 0);
2384 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2385 dres
->whatNext
= Dis_StopHere
;
2386 dres
->jk_StopHere
= kind
;
2387 stmt( IRStmt_Put( OFFB_RIP
, mkexpr(t
) ) );
2391 void jcc_01 ( /*MOD*/DisResult
* dres
,
2392 AMD64Condcode cond
, Addr64 d64_false
, Addr64 d64_true
)
2395 AMD64Condcode condPos
;
2396 vassert(dres
->whatNext
== Dis_Continue
);
2397 vassert(dres
->len
== 0);
2398 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2399 dres
->whatNext
= Dis_StopHere
;
2400 dres
->jk_StopHere
= Ijk_Boring
;
2401 condPos
= positiveIse_AMD64Condcode ( cond
, &invert
);
2403 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2405 IRConst_U64(d64_false
),
2407 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_true
) ) );
2409 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2411 IRConst_U64(d64_true
),
2413 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_false
) ) );
2417 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2418 guest address of the next instruction to be executed.
2420 This function generates an AbiHint to say that -128(%rsp)
2421 .. -1(%rsp) should now be regarded as uninitialised.
2424 void make_redzone_AbiHint ( const VexAbiInfo
* vbi
,
2425 IRTemp new_rsp
, IRTemp nia
, const HChar
* who
)
2427 Int szB
= vbi
->guest_stack_redzone_size
;
2430 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2431 for is ELF. So just check it's the expected 128 value
2433 vassert(szB
== 128);
2435 if (0) vex_printf("AbiHint: %s\n", who
);
2436 vassert(typeOfIRTemp(irsb
->tyenv
, new_rsp
) == Ity_I64
);
2437 vassert(typeOfIRTemp(irsb
->tyenv
, nia
) == Ity_I64
);
2439 stmt( IRStmt_AbiHint(
2440 binop(Iop_Sub64
, mkexpr(new_rsp
), mkU64(szB
)),
2447 /*------------------------------------------------------------*/
2448 /*--- Disassembling addressing modes ---*/
2449 /*------------------------------------------------------------*/
2452 const HChar
* segRegTxt ( Prefix pfx
)
2454 if (pfx
& PFX_CS
) return "%cs:";
2455 if (pfx
& PFX_DS
) return "%ds:";
2456 if (pfx
& PFX_ES
) return "%es:";
2457 if (pfx
& PFX_FS
) return "%fs:";
2458 if (pfx
& PFX_GS
) return "%gs:";
2459 if (pfx
& PFX_SS
) return "%ss:";
2460 return ""; /* no override */
2464 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2465 linear address by adding any required segment override as indicated
2466 by sorb, and also dealing with any address size override
2469 IRExpr
* handleAddrOverrides ( const VexAbiInfo
* vbi
,
2470 Prefix pfx
, IRExpr
* virtual )
2472 /* --- address size override --- */
2474 virtual = unop(Iop_32Uto64
, unop(Iop_64to32
, virtual));
2476 /* Note that the below are hacks that relies on the assumption
2477 that %fs or %gs are constant.
2478 Typically, %fs is always 0x63 on linux (in the main thread, it
2479 stays at value 0), %gs always 0x60 on Darwin, ... */
2480 /* --- segment overrides --- */
2482 if (vbi
->guest_amd64_assume_fs_is_const
) {
2483 /* return virtual + guest_FS_CONST. */
2484 virtual = binop(Iop_Add64
, virtual,
2485 IRExpr_Get(OFFB_FS_CONST
, Ity_I64
));
2487 unimplemented("amd64 %fs segment override");
2492 if (vbi
->guest_amd64_assume_gs_is_const
) {
2493 /* return virtual + guest_GS_CONST. */
2494 virtual = binop(Iop_Add64
, virtual,
2495 IRExpr_Get(OFFB_GS_CONST
, Ity_I64
));
2497 unimplemented("amd64 %gs segment override");
2501 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2508 //.. IRType hWordTy;
2509 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2512 //.. /* the common case - no override */
2513 //.. return virtual;
2515 //.. switch (sorb) {
2516 //.. case 0x3E: sreg = R_DS; break;
2517 //.. case 0x26: sreg = R_ES; break;
2518 //.. case 0x64: sreg = R_FS; break;
2519 //.. case 0x65: sreg = R_GS; break;
2520 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2523 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2525 //.. seg_selector = newTemp(Ity_I32);
2526 //.. ldt_ptr = newTemp(hWordTy);
2527 //.. gdt_ptr = newTemp(hWordTy);
2528 //.. r64 = newTemp(Ity_I64);
2530 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2531 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2532 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2535 //.. Call this to do the translation and limit checks:
2536 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2537 //.. UInt seg_selector, UInt virtual_addr )
2544 //.. "x86g_use_seg_selector",
2545 //.. &x86g_use_seg_selector,
2546 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2547 //.. mkexpr(seg_selector), virtual)
2551 //.. /* If the high 32 of the result are non-zero, there was a
2552 //.. failure in address translation. In which case, make a
2557 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2559 //.. IRConst_U32( guest_eip_curr_instr )
2563 //.. /* otherwise, here's the translated result. */
2564 //.. return unop(Iop_64to32, mkexpr(r64));
2568 /* Generate IR to calculate an address indicated by a ModRM and
2569 following SIB bytes. The expression, and the number of bytes in
2570 the address mode, are returned (the latter in *len). Note that
2571 this fn should not be called if the R/M part of the address denotes
2572 a register instead of memory. If print_codegen is true, text of
2573 the addressing mode is placed in buf.
2575 The computed address is stored in a new tempreg, and the
2576 identity of the tempreg is returned.
2578 extra_bytes holds the number of bytes after the amode, as supplied
2579 by the caller. This is needed to make sense of %rip-relative
2580 addresses. Note that the value that *len is set to is only the
2581 length of the amode itself and does not include the value supplied
2585 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr64
)
2587 IRTemp tmp
= newTemp(Ity_I64
);
2588 assign( tmp
, addr64
);
2593 IRTemp
disAMode ( /*OUT*/Int
* len
,
2594 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2595 /*OUT*/HChar
* buf
, Int extra_bytes
)
2597 UChar mod_reg_rm
= getUChar(delta
);
2601 vassert(extra_bytes
>= 0 && extra_bytes
< 10);
2603 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2604 jump table seems a bit excessive.
2606 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2607 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2608 /* is now XX0XXYYY */
2609 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2610 switch (mod_reg_rm
) {
2612 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2613 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2615 case 0x00: case 0x01: case 0x02: case 0x03:
2616 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2617 { UChar rm
= toUChar(mod_reg_rm
& 7);
2618 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2620 return disAMode_copy2tmp(
2621 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,rm
)));
2624 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2625 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2627 case 0x08: case 0x09: case 0x0A: case 0x0B:
2628 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2629 { UChar rm
= toUChar(mod_reg_rm
& 7);
2630 Long d
= getSDisp8(delta
);
2632 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2634 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2637 return disAMode_copy2tmp(
2638 handleAddrOverrides(vbi
, pfx
,
2639 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2642 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2643 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2645 case 0x10: case 0x11: case 0x12: case 0x13:
2646 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2647 { UChar rm
= toUChar(mod_reg_rm
& 7);
2648 Long d
= getSDisp32(delta
);
2649 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2651 return disAMode_copy2tmp(
2652 handleAddrOverrides(vbi
, pfx
,
2653 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2656 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2657 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2658 case 0x18: case 0x19: case 0x1A: case 0x1B:
2659 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2660 vpanic("disAMode(amd64): not an addr!");
2662 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2663 correctly at the start of handling each instruction. */
2665 { Long d
= getSDisp32(delta
);
2667 DIS(buf
, "%s%lld(%%rip)", segRegTxt(pfx
), d
);
2668 /* We need to know the next instruction's start address.
2669 Try and figure out what it is, record the guess, and ask
2670 the top-level driver logic (bbToIR_AMD64) to check we
2671 guessed right, after the instruction is completely
2673 guest_RIP_next_mustcheck
= True
;
2674 guest_RIP_next_assumed
= guest_RIP_bbstart
2675 + delta
+4 + extra_bytes
;
2676 return disAMode_copy2tmp(
2677 handleAddrOverrides(vbi
, pfx
,
2678 binop(Iop_Add64
, mkU64(guest_RIP_next_assumed
),
2683 /* SIB, with no displacement. Special cases:
2684 -- %rsp cannot act as an index value.
2685 If index_r indicates %rsp, zero is used for the index.
2686 -- when mod is zero and base indicates RBP or R13, base is
2687 instead a 32-bit sign-extended literal.
2688 It's all madness, I tell you. Extract %index, %base and
2689 scale from the SIB byte. The value denoted is then:
2690 | %index == %RSP && (%base == %RBP || %base == %R13)
2691 = d32 following SIB byte
2692 | %index == %RSP && !(%base == %RBP || %base == %R13)
2694 | %index != %RSP && (%base == %RBP || %base == %R13)
2695 = d32 following SIB byte + (%index << scale)
2696 | %index != %RSP && !(%base == %RBP || %base == %R13)
2697 = %base + (%index << scale)
2699 UChar sib
= getUChar(delta
);
2700 UChar scale
= toUChar((sib
>> 6) & 3);
2701 UChar index_r
= toUChar((sib
>> 3) & 7);
2702 UChar base_r
= toUChar(sib
& 7);
2703 /* correct since #(R13) == 8 + #(RBP) */
2704 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2705 Bool index_is_SP
= toBool(index_r
== R_RSP
&& 0==getRexX(pfx
));
2708 if ((!index_is_SP
) && (!base_is_BPor13
)) {
2710 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2711 nameIRegRexB(8,pfx
,base_r
),
2712 nameIReg64rexX(pfx
,index_r
));
2714 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2715 nameIRegRexB(8,pfx
,base_r
),
2716 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2721 handleAddrOverrides(vbi
, pfx
,
2723 getIRegRexB(8,pfx
,base_r
),
2724 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2728 if ((!index_is_SP
) && base_is_BPor13
) {
2729 Long d
= getSDisp32(delta
);
2730 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
,
2731 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2735 handleAddrOverrides(vbi
, pfx
,
2737 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2742 if (index_is_SP
&& (!base_is_BPor13
)) {
2743 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,base_r
));
2745 return disAMode_copy2tmp(
2746 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,base_r
)));
2749 if (index_is_SP
&& base_is_BPor13
) {
2750 Long d
= getSDisp32(delta
);
2751 DIS(buf
, "%s%lld", segRegTxt(pfx
), d
);
2753 return disAMode_copy2tmp(
2754 handleAddrOverrides(vbi
, pfx
, mkU64(d
)));
2760 /* SIB, with 8-bit displacement. Special cases:
2761 -- %esp cannot act as an index value.
2762 If index_r indicates %esp, zero is used for the index.
2767 = d8 + %base + (%index << scale)
2770 UChar sib
= getUChar(delta
);
2771 UChar scale
= toUChar((sib
>> 6) & 3);
2772 UChar index_r
= toUChar((sib
>> 3) & 7);
2773 UChar base_r
= toUChar(sib
& 7);
2774 Long d
= getSDisp8(delta
+1);
2776 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2777 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2778 d
, nameIRegRexB(8,pfx
,base_r
));
2780 return disAMode_copy2tmp(
2781 handleAddrOverrides(vbi
, pfx
,
2782 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2785 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2786 nameIRegRexB(8,pfx
,base_r
),
2787 nameIReg64rexX(pfx
,index_r
));
2789 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2790 nameIRegRexB(8,pfx
,base_r
),
2791 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2796 handleAddrOverrides(vbi
, pfx
,
2799 getIRegRexB(8,pfx
,base_r
),
2801 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2804 vassert(0); /*NOTREACHED*/
2807 /* SIB, with 32-bit displacement. Special cases:
2808 -- %rsp cannot act as an index value.
2809 If index_r indicates %rsp, zero is used for the index.
2814 = d32 + %base + (%index << scale)
2817 UChar sib
= getUChar(delta
);
2818 UChar scale
= toUChar((sib
>> 6) & 3);
2819 UChar index_r
= toUChar((sib
>> 3) & 7);
2820 UChar base_r
= toUChar(sib
& 7);
2821 Long d
= getSDisp32(delta
+1);
2823 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2824 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2825 d
, nameIRegRexB(8,pfx
,base_r
));
2827 return disAMode_copy2tmp(
2828 handleAddrOverrides(vbi
, pfx
,
2829 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2832 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2833 nameIRegRexB(8,pfx
,base_r
),
2834 nameIReg64rexX(pfx
,index_r
));
2836 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2837 nameIRegRexB(8,pfx
,base_r
),
2838 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2843 handleAddrOverrides(vbi
, pfx
,
2846 getIRegRexB(8,pfx
,base_r
),
2848 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2851 vassert(0); /*NOTREACHED*/
2855 vpanic("disAMode(amd64)");
2856 return 0; /*notreached*/
2861 /* Similarly for VSIB addressing. This returns just the addend,
2862 and fills in *rI and *vscale with the register number of the vector
2863 index and its multiplicand. */
2865 IRTemp
disAVSIBMode ( /*OUT*/Int
* len
,
2866 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2867 /*OUT*/HChar
* buf
, /*OUT*/UInt
* rI
,
2868 IRType ty
, /*OUT*/Int
* vscale
)
2870 UChar mod_reg_rm
= getUChar(delta
);
2871 const HChar
*vindex
;
2877 if ((mod_reg_rm
& 7) != 4 || epartIsReg(mod_reg_rm
))
2878 return IRTemp_INVALID
;
2880 UChar sib
= getUChar(delta
+1);
2881 UChar scale
= toUChar((sib
>> 6) & 3);
2882 UChar index_r
= toUChar((sib
>> 3) & 7);
2883 UChar base_r
= toUChar(sib
& 7);
2885 /* correct since #(R13) == 8 + #(RBP) */
2886 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2890 *rI
= index_r
| (getRexX(pfx
) << 3);
2892 vindex
= nameXMMReg(*rI
);
2894 vindex
= nameYMMReg(*rI
);
2897 switch (mod_reg_rm
>> 6) {
2899 if (base_is_BPor13
) {
2900 d
= getSDisp32(delta
);
2903 DIS(buf
, "%s%lld(,%s)", segRegTxt(pfx
), d
, vindex
);
2905 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
, vindex
, 1<<scale
);
2907 return disAMode_copy2tmp( mkU64(d
) );
2910 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2911 nameIRegRexB(8,pfx
,base_r
), vindex
);
2913 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2914 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2919 d
= getSDisp8(delta
);
2923 d
= getSDisp32(delta
);
2927 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2928 nameIRegRexB(8,pfx
,base_r
), vindex
);
2930 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2931 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2937 return disAMode_copy2tmp( getIRegRexB(8,pfx
,base_r
) );
2938 return disAMode_copy2tmp( binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
),
2943 /* Figure out the number of (insn-stream) bytes constituting the amode
2944 beginning at delta. Is useful for getting hold of literals beyond
2945 the end of the amode before it has been disassembled. */
2947 static UInt
lengthAMode ( Prefix pfx
, Long delta
)
2949 UChar mod_reg_rm
= getUChar(delta
);
2952 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2953 jump table seems a bit excessive.
2955 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2956 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2957 /* is now XX0XXYYY */
2958 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2959 switch (mod_reg_rm
) {
2961 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2962 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2964 case 0x00: case 0x01: case 0x02: case 0x03:
2965 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2968 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2969 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2971 case 0x08: case 0x09: case 0x0A: case 0x0B:
2972 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2975 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2976 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2978 case 0x10: case 0x11: case 0x12: case 0x13:
2979 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2982 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2983 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2984 /* Not an address, but still handled. */
2985 case 0x18: case 0x19: case 0x1A: case 0x1B:
2986 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2994 /* SIB, with no displacement. */
2995 UChar sib
= getUChar(delta
);
2996 UChar base_r
= toUChar(sib
& 7);
2997 /* correct since #(R13) == 8 + #(RBP) */
2998 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
3000 if (base_is_BPor13
) {
3007 /* SIB, with 8-bit displacement. */
3011 /* SIB, with 32-bit displacement. */
3016 vpanic("lengthAMode(amd64)");
3017 return 0; /*notreached*/
3022 /*------------------------------------------------------------*/
3023 /*--- Disassembling common idioms ---*/
3024 /*------------------------------------------------------------*/
3027 enum { WithFlagNone
=2, WithFlagCarry
, WithFlagCarryX
, WithFlagOverX
}
3030 /* Handle binary integer instructions of the form
3033 Is passed the a ptr to the modRM byte, the actual operation, and the
3034 data size. Returns the address advanced completely over this
3037 E(src) is reg-or-mem
3040 If E is reg, --> GET %G, tmp
3044 If E is mem and OP is not reversible,
3045 --> (getAddr E) -> tmpa
3051 If E is mem and OP is reversible
3052 --> (getAddr E) -> tmpa
3058 ULong
dis_op2_E_G ( const VexAbiInfo
* vbi
,
3065 const HChar
* t_amd64opc
)
3069 IRType ty
= szToITy(size
);
3070 IRTemp dst1
= newTemp(ty
);
3071 IRTemp src
= newTemp(ty
);
3072 IRTemp dst0
= newTemp(ty
);
3073 UChar rm
= getUChar(delta0
);
3074 IRTemp addr
= IRTemp_INVALID
;
3076 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3080 case WithFlagNone
: case WithFlagCarry
:
3081 case WithFlagCarryX
: case WithFlagOverX
:
3089 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3090 if (flag
== WithFlagCarry
) vassert(keep
);
3093 vassert(flag
== WithFlagNone
);
3095 case Iop_Or8
: case Iop_Xor8
:
3096 vassert(flag
== WithFlagNone
);
3103 if (epartIsReg(rm
)) {
3104 /* Specially handle XOR reg,reg, because that doesn't really
3105 depend on reg, and doing the obvious thing potentially
3106 generates a spurious value check failure due to the bogus
3107 dependency. Ditto SUB/SBB reg,reg. */
3108 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3109 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3110 putIRegG(size
,pfx
,rm
, mkU(ty
,0));
3113 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3114 assign( src
, getIRegE(size
,pfx
,rm
) );
3116 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3117 helper_ADC( size
, dst1
, dst0
, src
,
3118 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3119 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3121 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3122 helper_SBB( size
, dst1
, dst0
, src
,
3123 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3124 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3126 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3127 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3128 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3130 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3131 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3132 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3134 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3136 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3138 setFlags_DEP1(op8
, dst1
, ty
);
3140 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3143 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3144 nameIRegE(size
,pfx
,rm
),
3145 nameIRegG(size
,pfx
,rm
));
3148 /* E refers to memory */
3149 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3150 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3151 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
3153 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3154 helper_ADC( size
, dst1
, dst0
, src
,
3155 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3156 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3158 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3159 helper_SBB( size
, dst1
, dst0
, src
,
3160 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3161 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3163 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3164 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3165 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3167 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3168 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3169 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3171 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3173 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3175 setFlags_DEP1(op8
, dst1
, ty
);
3177 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3180 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3181 dis_buf
, nameIRegG(size
, pfx
, rm
));
3188 /* Handle binary integer instructions of the form
3191 Is passed the a ptr to the modRM byte, the actual operation, and the
3192 data size. Returns the address advanced completely over this
3196 E(dst) is reg-or-mem
3198 If E is reg, --> GET %E, tmp
3202 If E is mem, --> (getAddr E) -> tmpa
3208 ULong
dis_op2_G_E ( const VexAbiInfo
* vbi
,
3215 const HChar
* t_amd64opc
)
3219 IRType ty
= szToITy(size
);
3220 IRTemp dst1
= newTemp(ty
);
3221 IRTemp src
= newTemp(ty
);
3222 IRTemp dst0
= newTemp(ty
);
3223 UChar rm
= getUChar(delta0
);
3224 IRTemp addr
= IRTemp_INVALID
;
3226 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3229 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3233 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3234 if (flag
== WithFlagCarry
) vassert(keep
);
3236 case Iop_And8
: case Iop_Or8
: case Iop_Xor8
:
3237 vassert(flag
== WithFlagNone
);
3244 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3245 intended operation is add-with-carry or subtract-with-borrow. */
3247 if (epartIsReg(rm
)) {
3248 /* Specially handle XOR reg,reg, because that doesn't really
3249 depend on reg, and doing the obvious thing potentially
3250 generates a spurious value check failure due to the bogus
3251 dependency. Ditto SUB/SBB reg,reg. */
3252 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3253 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3254 putIRegE(size
,pfx
,rm
, mkU(ty
,0));
3257 assign(dst0
, getIRegE(size
,pfx
,rm
));
3258 assign(src
, getIRegG(size
,pfx
,rm
));
3260 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3261 helper_ADC( size
, dst1
, dst0
, src
,
3262 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3263 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3265 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3266 helper_SBB( size
, dst1
, dst0
, src
,
3267 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3268 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3270 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3272 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3274 setFlags_DEP1(op8
, dst1
, ty
);
3276 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3279 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3280 nameIRegG(size
,pfx
,rm
),
3281 nameIRegE(size
,pfx
,rm
));
3285 /* E refers to memory */
3287 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3288 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3289 assign(src
, getIRegG(size
,pfx
,rm
));
3291 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3292 if (haveLOCK(pfx
)) {
3293 /* cas-style store */
3294 helper_ADC( size
, dst1
, dst0
, src
,
3295 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3298 helper_ADC( size
, dst1
, dst0
, src
,
3299 /*store*/addr
, IRTemp_INVALID
, 0 );
3302 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3303 if (haveLOCK(pfx
)) {
3304 /* cas-style store */
3305 helper_SBB( size
, dst1
, dst0
, src
,
3306 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3309 helper_SBB( size
, dst1
, dst0
, src
,
3310 /*store*/addr
, IRTemp_INVALID
, 0 );
3313 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3315 if (haveLOCK(pfx
)) {
3316 if (0) vex_printf("locked case\n" );
3317 casLE( mkexpr(addr
),
3318 mkexpr(dst0
)/*expval*/,
3319 mkexpr(dst1
)/*newval*/, guest_RIP_curr_instr
);
3321 if (0) vex_printf("nonlocked case\n");
3322 storeLE(mkexpr(addr
), mkexpr(dst1
));
3326 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3328 setFlags_DEP1(op8
, dst1
, ty
);
3331 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3332 nameIRegG(size
,pfx
,rm
), dis_buf
);
3338 /* Handle move instructions of the form
3341 Is passed the a ptr to the modRM byte, and the data size. Returns
3342 the address advanced completely over this instruction.
3344 E(src) is reg-or-mem
3347 If E is reg, --> GET %E, tmpv
3350 If E is mem --> (getAddr E) -> tmpa
3355 ULong
dis_mov_E_G ( const VexAbiInfo
* vbi
,
3361 UChar rm
= getUChar(delta0
);
3364 if (epartIsReg(rm
)) {
3365 putIRegG(size
, pfx
, rm
, getIRegE(size
, pfx
, rm
));
3366 DIP("mov%c %s,%s\n", nameISize(size
),
3367 nameIRegE(size
,pfx
,rm
),
3368 nameIRegG(size
,pfx
,rm
));
3372 /* E refers to memory */
3374 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3375 putIRegG(size
, pfx
, rm
, loadLE(szToITy(size
), mkexpr(addr
)));
3376 DIP("mov%c %s,%s\n", nameISize(size
),
3378 nameIRegG(size
,pfx
,rm
));
3384 /* Handle move instructions of the form
3387 Is passed the a ptr to the modRM byte, and the data size. Returns
3388 the address advanced completely over this instruction.
3389 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3392 E(dst) is reg-or-mem
3394 If E is reg, --> GET %G, tmp
3397 If E is mem, --> (getAddr E) -> tmpa
3402 ULong
dis_mov_G_E ( const VexAbiInfo
* vbi
,
3409 UChar rm
= getUChar(delta0
);
3414 if (epartIsReg(rm
)) {
3415 if (haveF2orF3(pfx
)) { *ok
= False
; return delta0
; }
3416 putIRegE(size
, pfx
, rm
, getIRegG(size
, pfx
, rm
));
3417 DIP("mov%c %s,%s\n", nameISize(size
),
3418 nameIRegG(size
,pfx
,rm
),
3419 nameIRegE(size
,pfx
,rm
));
3423 /* E refers to memory */
3425 if (haveF2(pfx
)) { *ok
= False
; return delta0
; }
3426 /* F3(XRELEASE) is acceptable, though. */
3427 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3428 storeLE( mkexpr(addr
), getIRegG(size
, pfx
, rm
) );
3429 DIP("mov%c %s,%s\n", nameISize(size
),
3430 nameIRegG(size
,pfx
,rm
),
3437 /* op $immediate, AL/AX/EAX/RAX. */
3439 ULong
dis_op_imm_A ( Int size
,
3444 const HChar
* t_amd64opc
)
3446 Int size4
= imin(size
,4);
3447 IRType ty
= szToITy(size
);
3448 IRTemp dst0
= newTemp(ty
);
3449 IRTemp src
= newTemp(ty
);
3450 IRTemp dst1
= newTemp(ty
);
3451 Long lit
= getSDisp(size4
,delta
);
3452 assign(dst0
, getIRegRAX(size
));
3453 assign(src
, mkU(ty
,lit
& mkSizeMask(size
)));
3455 if (isAddSub(op8
) && !carrying
) {
3456 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3457 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3462 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3463 setFlags_DEP1(op8
, dst1
, ty
);
3466 if (op8
== Iop_Add8
&& carrying
) {
3467 helper_ADC( size
, dst1
, dst0
, src
,
3468 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3471 if (op8
== Iop_Sub8
&& carrying
) {
3472 helper_SBB( size
, dst1
, dst0
, src
,
3473 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3476 vpanic("dis_op_imm_A(amd64,guest)");
3479 putIRegRAX(size
, mkexpr(dst1
));
3481 DIP("%s%c $%lld, %s\n", t_amd64opc
, nameISize(size
),
3482 lit
, nameIRegRAX(size
));
3487 /* Sign- and Zero-extending moves. */
3489 ULong
dis_movx_E_G ( const VexAbiInfo
* vbi
,
3491 Long delta
, Int szs
, Int szd
, Bool sign_extend
)
3493 UChar rm
= getUChar(delta
);
3494 if (epartIsReg(rm
)) {
3495 putIRegG(szd
, pfx
, rm
,
3497 szs
,szd
,sign_extend
,
3498 getIRegE(szs
,pfx
,rm
)));
3499 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3502 nameIRegE(szs
,pfx
,rm
),
3503 nameIRegG(szd
,pfx
,rm
));
3507 /* E refers to memory */
3511 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
3512 putIRegG(szd
, pfx
, rm
,
3514 szs
,szd
,sign_extend
,
3515 loadLE(szToITy(szs
),mkexpr(addr
))));
3516 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3520 nameIRegG(szd
,pfx
,rm
));
3526 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3527 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3529 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
3531 /* special-case the 64-bit case */
3533 IROp op
= signed_divide
? Iop_DivModS128to64
3534 : Iop_DivModU128to64
;
3535 IRTemp src128
= newTemp(Ity_I128
);
3536 IRTemp dst128
= newTemp(Ity_I128
);
3537 assign( src128
, binop(Iop_64HLto128
,
3539 getIReg64(R_RAX
)) );
3540 assign( dst128
, binop(op
, mkexpr(src128
), mkexpr(t
)) );
3541 putIReg64( R_RAX
, unop(Iop_128to64
,mkexpr(dst128
)) );
3542 putIReg64( R_RDX
, unop(Iop_128HIto64
,mkexpr(dst128
)) );
3544 IROp op
= signed_divide
? Iop_DivModS64to32
3545 : Iop_DivModU64to32
;
3546 IRTemp src64
= newTemp(Ity_I64
);
3547 IRTemp dst64
= newTemp(Ity_I64
);
3551 binop(Iop_32HLto64
, getIRegRDX(4), getIRegRAX(4)) );
3553 binop(op
, mkexpr(src64
), mkexpr(t
)) );
3554 putIRegRAX( 4, unop(Iop_64to32
,mkexpr(dst64
)) );
3555 putIRegRDX( 4, unop(Iop_64HIto32
,mkexpr(dst64
)) );
3558 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3559 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3560 assign( src64
, unop(widen3264
,
3564 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
3565 putIRegRAX( 2, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
3566 putIRegRDX( 2, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
3570 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3571 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3572 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
3573 assign( src64
, unop(widen3264
,
3574 unop(widen1632
, getIRegRAX(2))) );
3576 binop(op
, mkexpr(src64
),
3577 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
3578 putIRegRAX( 1, unop(Iop_16to8
,
3580 unop(Iop_64to32
,mkexpr(dst64
)))) );
3581 putIRegAH( unop(Iop_16to8
,
3583 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
3587 vpanic("codegen_div(amd64)");
3593 ULong
dis_Grp1 ( const VexAbiInfo
* vbi
,
3595 Long delta
, UChar modrm
,
3596 Int am_sz
, Int d_sz
, Int sz
, Long d64
)
3600 IRType ty
= szToITy(sz
);
3601 IRTemp dst1
= newTemp(ty
);
3602 IRTemp src
= newTemp(ty
);
3603 IRTemp dst0
= newTemp(ty
);
3604 IRTemp addr
= IRTemp_INVALID
;
3605 IROp op8
= Iop_INVALID
;
3606 ULong mask
= mkSizeMask(sz
);
3608 switch (gregLO3ofRM(modrm
)) {
3609 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
3610 case 2: break; // ADC
3611 case 3: break; // SBB
3612 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
3613 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
3615 default: vpanic("dis_Grp1(amd64): unhandled case");
3618 if (epartIsReg(modrm
)) {
3619 vassert(am_sz
== 1);
3621 assign(dst0
, getIRegE(sz
,pfx
,modrm
));
3622 assign(src
, mkU(ty
,d64
& mask
));
3624 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3625 helper_ADC( sz
, dst1
, dst0
, src
,
3626 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3628 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3629 helper_SBB( sz
, dst1
, dst0
, src
,
3630 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3632 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3634 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3636 setFlags_DEP1(op8
, dst1
, ty
);
3639 if (gregLO3ofRM(modrm
) < 7)
3640 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3642 delta
+= (am_sz
+ d_sz
);
3643 DIP("%s%c $%lld, %s\n",
3644 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
), d64
,
3645 nameIRegE(sz
,pfx
,modrm
));
3647 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3649 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3650 assign(src
, mkU(ty
,d64
& mask
));
3652 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3653 if (haveLOCK(pfx
)) {
3654 /* cas-style store */
3655 helper_ADC( sz
, dst1
, dst0
, src
,
3656 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3659 helper_ADC( sz
, dst1
, dst0
, src
,
3660 /*store*/addr
, IRTemp_INVALID
, 0 );
3663 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3664 if (haveLOCK(pfx
)) {
3665 /* cas-style store */
3666 helper_SBB( sz
, dst1
, dst0
, src
,
3667 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3670 helper_SBB( sz
, dst1
, dst0
, src
,
3671 /*store*/addr
, IRTemp_INVALID
, 0 );
3674 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3675 if (gregLO3ofRM(modrm
) < 7) {
3676 if (haveLOCK(pfx
)) {
3677 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
3678 mkexpr(dst1
)/*newVal*/,
3679 guest_RIP_curr_instr
);
3681 storeLE(mkexpr(addr
), mkexpr(dst1
));
3685 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3687 setFlags_DEP1(op8
, dst1
, ty
);
3690 delta
+= (len
+d_sz
);
3691 DIP("%s%c $%lld, %s\n",
3692 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
),
3699 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3703 ULong
dis_Grp2 ( const VexAbiInfo
* vbi
,
3705 Long delta
, UChar modrm
,
3706 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
3707 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
3709 /* delta on entry points at the modrm byte. */
3712 Bool isShift
, isRotate
, isRotateC
;
3713 IRType ty
= szToITy(sz
);
3714 IRTemp dst0
= newTemp(ty
);
3715 IRTemp dst1
= newTemp(ty
);
3716 IRTemp addr
= IRTemp_INVALID
;
3720 vassert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
3722 /* Put value to shift/rotate in dst0. */
3723 if (epartIsReg(modrm
)) {
3724 assign(dst0
, getIRegE(sz
, pfx
, modrm
));
3725 delta
+= (am_sz
+ d_sz
);
3727 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3728 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3729 delta
+= len
+ d_sz
;
3733 switch (gregLO3ofRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
3736 switch (gregLO3ofRM(modrm
)) { case 0: case 1: isRotate
= True
; }
3739 switch (gregLO3ofRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
3741 if (!isShift
&& !isRotate
&& !isRotateC
) {
3743 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3747 /* Call a helper; this insn is so ridiculous it does not deserve
3748 better. One problem is, the helper has to calculate both the
3749 new value and the new flags. This is more than 64 bits, and
3750 there is no way to return more than 64 bits from the helper.
3751 Hence the crude and obvious solution is to call it twice,
3752 using the sign of the sz field to indicate whether it is the
3753 value or rflags result we want.
3755 Bool left
= toBool(gregLO3ofRM(modrm
) == 2);
3757 IRExpr
** argsRFLAGS
;
3759 IRTemp new_value
= newTemp(Ity_I64
);
3760 IRTemp new_rflags
= newTemp(Ity_I64
);
3761 IRTemp old_rflags
= newTemp(Ity_I64
);
3763 assign( old_rflags
, widenUto64(mk_amd64g_calculate_rflags_all()) );
3766 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3767 widenUto64(shift_expr
), /* rotate amount */
3774 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3775 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3781 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3782 widenUto64(shift_expr
), /* rotate amount */
3789 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3790 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3795 assign( dst1
, narrowTo(ty
, mkexpr(new_value
)) );
3796 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
3797 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
3798 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
3799 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
3805 IRTemp pre64
= newTemp(Ity_I64
);
3806 IRTemp res64
= newTemp(Ity_I64
);
3807 IRTemp res64ss
= newTemp(Ity_I64
);
3808 IRTemp shift_amt
= newTemp(Ity_I8
);
3809 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3812 switch (gregLO3ofRM(modrm
)) {
3813 case 4: op64
= Iop_Shl64
; break;
3814 case 5: op64
= Iop_Shr64
; break;
3815 case 6: op64
= Iop_Shl64
; break;
3816 case 7: op64
= Iop_Sar64
; break;
3818 default: vpanic("dis_Grp2:shift"); break;
3821 /* Widen the value to be shifted to 64 bits, do the shift, and
3822 narrow back down. This seems surprisingly long-winded, but
3823 unfortunately the AMD semantics requires that 8/16/32-bit
3824 shifts give defined results for shift values all the way up
3825 to 32, and this seems the simplest way to do it. It has the
3826 advantage that the only IR level shifts generated are of 64
3827 bit values, and the shift amount is guaranteed to be in the
3828 range 0 .. 63, thereby observing the IR semantics requiring
3829 all shift values to be in the range 0 .. 2^word_size-1.
3831 Therefore the shift amount is masked with 63 for 64-bit shifts
3832 and 31 for all others.
3834 /* shift_amt = shift_expr & MASK, regardless of operation size */
3835 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(mask
)) );
3837 /* suitably widen the value to be shifted to 64 bits. */
3838 assign( pre64
, op64
==Iop_Sar64
? widenSto64(mkexpr(dst0
))
3839 : widenUto64(mkexpr(dst0
)) );
3841 /* res64 = pre64 `shift` shift_amt */
3842 assign( res64
, binop(op64
, mkexpr(pre64
), mkexpr(shift_amt
)) );
3844 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3850 mkexpr(shift_amt
), mkU8(1)),
3853 /* Build the flags thunk. */
3854 setFlags_DEP1_DEP2_shift(op64
, res64
, res64ss
, ty
, shift_amt
);
3856 /* Narrow the result back down. */
3857 assign( dst1
, narrowTo(ty
, mkexpr(res64
)) );
3859 } /* if (isShift) */
3863 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1
3864 : (ty
==Ity_I32
? 2 : 3));
3865 Bool left
= toBool(gregLO3ofRM(modrm
) == 0);
3866 IRTemp rot_amt
= newTemp(Ity_I8
);
3867 IRTemp rot_amt64
= newTemp(Ity_I8
);
3868 IRTemp oldFlags
= newTemp(Ity_I64
);
3869 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3871 /* rot_amt = shift_expr & mask */
3872 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3873 expressions never shift beyond the word size and thus remain
3875 assign(rot_amt64
, binop(Iop_And8
, shift_expr
, mkU8(mask
)));
3878 assign(rot_amt
, mkexpr(rot_amt64
));
3880 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt64
), mkU8(8*sz
-1)));
3884 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3886 binop( mkSizedOp(ty
,Iop_Or8
),
3887 binop( mkSizedOp(ty
,Iop_Shl8
),
3891 binop( mkSizedOp(ty
,Iop_Shr8
),
3893 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3897 ccOp
+= AMD64G_CC_OP_ROLB
;
3899 } else { /* right */
3901 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3903 binop( mkSizedOp(ty
,Iop_Or8
),
3904 binop( mkSizedOp(ty
,Iop_Shr8
),
3908 binop( mkSizedOp(ty
,Iop_Shl8
),
3910 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3914 ccOp
+= AMD64G_CC_OP_RORB
;
3918 /* dst1 now holds the rotated value. Build flag thunk. We
3919 need the resulting value for this, and the previous flags.
3920 Except don't set it if the rotate count is zero. */
3922 assign(oldFlags
, mk_amd64g_calculate_rflags_all());
3924 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3925 IRTemp rot_amt64b
= newTemp(Ity_I1
);
3926 assign(rot_amt64b
, binop(Iop_CmpNE8
, mkexpr(rot_amt64
), mkU8(0)) );
3928 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3929 stmt( IRStmt_Put( OFFB_CC_OP
,
3930 IRExpr_ITE( mkexpr(rot_amt64b
),
3932 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
3933 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3934 IRExpr_ITE( mkexpr(rot_amt64b
),
3935 widenUto64(mkexpr(dst1
)),
3936 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
3937 stmt( IRStmt_Put( OFFB_CC_DEP2
,
3938 IRExpr_ITE( mkexpr(rot_amt64b
),
3940 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
3941 stmt( IRStmt_Put( OFFB_CC_NDEP
,
3942 IRExpr_ITE( mkexpr(rot_amt64b
),
3944 IRExpr_Get(OFFB_CC_NDEP
,Ity_I64
) ) ));
3945 } /* if (isRotate) */
3947 /* Save result, and finish up. */
3948 if (epartIsReg(modrm
)) {
3949 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3950 if (vex_traceflags
& VEX_TRACE_FE
) {
3952 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3954 vex_printf("%s", shift_expr_txt
);
3956 ppIRExpr(shift_expr
);
3957 vex_printf(", %s\n", nameIRegE(sz
,pfx
,modrm
));
3960 storeLE(mkexpr(addr
), mkexpr(dst1
));
3961 if (vex_traceflags
& VEX_TRACE_FE
) {
3963 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3965 vex_printf("%s", shift_expr_txt
);
3967 ppIRExpr(shift_expr
);
3968 vex_printf(", %s\n", dis_buf
);
3975 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3977 ULong
dis_Grp8_Imm ( const VexAbiInfo
* vbi
,
3979 Long delta
, UChar modrm
,
3980 Int am_sz
, Int sz
, ULong src_val
,
3983 /* src_val denotes a d8.
3984 And delta on entry points at the modrm byte. */
3986 IRType ty
= szToITy(sz
);
3987 IRTemp t2
= newTemp(Ity_I64
);
3988 IRTemp t2m
= newTemp(Ity_I64
);
3989 IRTemp t_addr
= IRTemp_INVALID
;
3993 /* we're optimists :-) */
3996 /* Check whether F2 or F3 are acceptable. */
3997 if (epartIsReg(modrm
)) {
3998 /* F2 or F3 are not allowed in the register case. */
3999 if (haveF2orF3(pfx
)) {
4004 /* F2 or F3 (but not both) are allowable provided LOCK is also
4006 if (haveF2orF3(pfx
)) {
4007 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
4014 /* Limit src_val -- the bit offset -- to something within a word.
4015 The Intel docs say that literal offsets larger than a word are
4016 masked in this way. */
4018 case 2: src_val
&= 15; break;
4019 case 4: src_val
&= 31; break;
4020 case 8: src_val
&= 63; break;
4021 default: *decode_OK
= False
; return delta
;
4024 /* Invent a mask suitable for the operation. */
4025 switch (gregLO3ofRM(modrm
)) {
4026 case 4: /* BT */ mask
= 0; break;
4027 case 5: /* BTS */ mask
= 1ULL << src_val
; break;
4028 case 6: /* BTR */ mask
= ~(1ULL << src_val
); break;
4029 case 7: /* BTC */ mask
= 1ULL << src_val
; break;
4030 /* If this needs to be extended, probably simplest to make a
4031 new function to handle the other cases (0 .. 3). The
4032 Intel docs do however not indicate any use for 0 .. 3, so
4033 we don't expect this to happen. */
4034 default: *decode_OK
= False
; return delta
;
4037 /* Fetch the value to be tested and modified into t2, which is
4038 64-bits wide regardless of sz. */
4039 if (epartIsReg(modrm
)) {
4040 vassert(am_sz
== 1);
4041 assign( t2
, widenUto64(getIRegE(sz
, pfx
, modrm
)) );
4042 delta
+= (am_sz
+ 1);
4043 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
4045 src_val
, nameIRegE(sz
,pfx
,modrm
));
4048 t_addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 1 );
4050 assign( t2
, widenUto64(loadLE(ty
, mkexpr(t_addr
))) );
4051 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
4056 /* Compute the new value into t2m, if non-BT. */
4057 switch (gregLO3ofRM(modrm
)) {
4061 assign( t2m
, binop(Iop_Or64
, mkU64(mask
), mkexpr(t2
)) );
4064 assign( t2m
, binop(Iop_And64
, mkU64(mask
), mkexpr(t2
)) );
4067 assign( t2m
, binop(Iop_Xor64
, mkU64(mask
), mkexpr(t2
)) );
4070 /*NOTREACHED*/ /*the previous switch guards this*/
4074 /* Write the result back, if non-BT. */
4075 if (gregLO3ofRM(modrm
) != 4 /* BT */) {
4076 if (epartIsReg(modrm
)) {
4077 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(t2m
)));
4079 if (haveLOCK(pfx
)) {
4080 casLE( mkexpr(t_addr
),
4081 narrowTo(ty
, mkexpr(t2
))/*expd*/,
4082 narrowTo(ty
, mkexpr(t2m
))/*new*/,
4083 guest_RIP_curr_instr
);
4085 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
4090 /* Copy relevant bit from t2 into the carry flag. */
4091 /* Flags: C=selected bit, O,S,A,P undefined, Z unchanged */
4092 /* so let's also keep O,S,A,P unchanged */
4093 const ULong maskC
= AMD64G_CC_MASK_C
;
4094 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
4095 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
4098 IRTemp old_rflags
= newTemp(Ity_I64
);
4099 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
4101 IRTemp new_rflags
= newTemp(Ity_I64
);
4104 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
4106 binop(Iop_Shr64
, mkexpr(t2
), mkU8(src_val
)),
4109 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
4110 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
4111 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
4112 /* Set NDEP even though it isn't used. This makes redundant-PUT
4113 elimination of previous stores to this field work better. */
4114 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
4120 /* Signed/unsigned widening multiply. Generate IR to multiply the
4121 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4122 RDX:RAX/EDX:EAX/DX:AX/AX.
4124 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
4125 IRTemp tmp
, const HChar
* tmp_txt
)
4127 IRType ty
= szToITy(sz
);
4128 IRTemp t1
= newTemp(ty
);
4130 assign( t1
, getIRegRAX(sz
) );
4134 IRTemp res128
= newTemp(Ity_I128
);
4135 IRTemp resHi
= newTemp(Ity_I64
);
4136 IRTemp resLo
= newTemp(Ity_I64
);
4137 IROp mulOp
= syned
? Iop_MullS64
: Iop_MullU64
;
4138 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4139 setFlags_MUL ( Ity_I64
, t1
, tmp
, tBaseOp
);
4140 assign( res128
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4141 assign( resHi
, unop(Iop_128HIto64
,mkexpr(res128
)));
4142 assign( resLo
, unop(Iop_128to64
,mkexpr(res128
)));
4143 putIReg64(R_RDX
, mkexpr(resHi
));
4144 putIReg64(R_RAX
, mkexpr(resLo
));
4148 IRTemp res64
= newTemp(Ity_I64
);
4149 IRTemp resHi
= newTemp(Ity_I32
);
4150 IRTemp resLo
= newTemp(Ity_I32
);
4151 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
4152 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4153 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
4154 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4155 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
4156 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
4157 putIRegRDX(4, mkexpr(resHi
));
4158 putIRegRAX(4, mkexpr(resLo
));
4162 IRTemp res32
= newTemp(Ity_I32
);
4163 IRTemp resHi
= newTemp(Ity_I16
);
4164 IRTemp resLo
= newTemp(Ity_I16
);
4165 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
4166 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4167 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
4168 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4169 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
4170 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
4171 putIRegRDX(2, mkexpr(resHi
));
4172 putIRegRAX(2, mkexpr(resLo
));
4176 IRTemp res16
= newTemp(Ity_I16
);
4177 IRTemp resHi
= newTemp(Ity_I8
);
4178 IRTemp resLo
= newTemp(Ity_I8
);
4179 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
4180 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4181 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
4182 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4183 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
4184 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
4185 putIRegRAX(2, mkexpr(res16
));
4190 vpanic("codegen_mulL_A_D(amd64)");
4192 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
4196 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4199 ULong
dis_Grp3 ( const VexAbiInfo
* vbi
,
4200 Prefix pfx
, Int sz
, Long delta
, Bool
* decode_OK
)
4207 IRType ty
= szToITy(sz
);
4208 IRTemp t1
= newTemp(ty
);
4209 IRTemp dst1
, src
, dst0
;
4211 modrm
= getUChar(delta
);
4212 if (epartIsReg(modrm
)) {
4213 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4214 if (haveF2orF3(pfx
)) goto unhandled
;
4215 switch (gregLO3ofRM(modrm
)) {
4216 case 0: { /* TEST */
4218 d64
= getSDisp(imin(4,sz
), delta
);
4219 delta
+= imin(4,sz
);
4221 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4222 getIRegE(sz
,pfx
,modrm
),
4223 mkU(ty
, d64
& mkSizeMask(sz
))));
4224 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4225 DIP("test%c $%lld, %s\n",
4227 nameIRegE(sz
, pfx
, modrm
));
4235 putIRegE(sz
, pfx
, modrm
,
4236 unop(mkSizedOp(ty
,Iop_Not8
),
4237 getIRegE(sz
, pfx
, modrm
)));
4238 DIP("not%c %s\n", nameISize(sz
),
4239 nameIRegE(sz
, pfx
, modrm
));
4246 assign(dst0
, mkU(ty
,0));
4247 assign(src
, getIRegE(sz
, pfx
, modrm
));
4248 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4250 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4251 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
4252 DIP("neg%c %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
));
4254 case 4: /* MUL (unsigned widening) */
4257 assign(src
, getIRegE(sz
,pfx
,modrm
));
4258 codegen_mulL_A_D ( sz
, False
, src
,
4259 nameIRegE(sz
,pfx
,modrm
) );
4261 case 5: /* IMUL (signed widening) */
4264 assign(src
, getIRegE(sz
,pfx
,modrm
));
4265 codegen_mulL_A_D ( sz
, True
, src
,
4266 nameIRegE(sz
,pfx
,modrm
) );
4270 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4271 codegen_div ( sz
, t1
, False
);
4272 DIP("div%c %s\n", nameISize(sz
),
4273 nameIRegE(sz
, pfx
, modrm
));
4277 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4278 codegen_div ( sz
, t1
, True
);
4279 DIP("idiv%c %s\n", nameISize(sz
),
4280 nameIRegE(sz
, pfx
, modrm
));
4284 vpanic("Grp3(amd64,R)");
4287 /* Decide if F2/XACQ or F3/XREL might be valid. */
4288 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4289 if ((gregLO3ofRM(modrm
) == 3/*NEG*/ || gregLO3ofRM(modrm
) == 2/*NOT*/)
4290 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4293 if (!validF2orF3
) goto unhandled
;
4295 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
4296 /* we have to inform disAMode of any immediate
4298 gregLO3ofRM(modrm
)==0/*TEST*/
4304 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4305 switch (gregLO3ofRM(modrm
)) {
4306 case 0: { /* TEST */
4307 d64
= getSDisp(imin(4,sz
), delta
);
4308 delta
+= imin(4,sz
);
4310 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4312 mkU(ty
, d64
& mkSizeMask(sz
))));
4313 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4314 DIP("test%c $%lld, %s\n", nameISize(sz
), d64
, dis_buf
);
4322 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
4323 if (haveLOCK(pfx
)) {
4324 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4325 guest_RIP_curr_instr
);
4327 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4329 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
4335 assign(dst0
, mkU(ty
,0));
4336 assign(src
, mkexpr(t1
));
4337 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4339 if (haveLOCK(pfx
)) {
4340 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4341 guest_RIP_curr_instr
);
4343 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4345 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4346 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
4348 case 4: /* MUL (unsigned widening) */
4349 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
4352 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
4355 codegen_div ( sz
, t1
, False
);
4356 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
4359 codegen_div ( sz
, t1
, True
);
4360 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
4364 vpanic("Grp3(amd64,M)");
4374 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4377 ULong
dis_Grp4 ( const VexAbiInfo
* vbi
,
4378 Prefix pfx
, Long delta
, Bool
* decode_OK
)
4384 IRTemp t1
= newTemp(ty
);
4385 IRTemp t2
= newTemp(ty
);
4389 modrm
= getUChar(delta
);
4390 if (epartIsReg(modrm
)) {
4391 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4392 if (haveF2orF3(pfx
)) goto unhandled
;
4393 assign(t1
, getIRegE(1, pfx
, modrm
));
4394 switch (gregLO3ofRM(modrm
)) {
4396 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4397 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4398 setFlags_INC_DEC( True
, t2
, ty
);
4401 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4402 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4403 setFlags_INC_DEC( False
, t2
, ty
);
4410 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)),
4411 nameIRegE(1, pfx
, modrm
));
4413 /* Decide if F2/XACQ or F3/XREL might be valid. */
4414 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4415 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4416 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4419 if (!validF2orF3
) goto unhandled
;
4421 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
4422 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
4423 switch (gregLO3ofRM(modrm
)) {
4425 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4426 if (haveLOCK(pfx
)) {
4427 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4428 guest_RIP_curr_instr
);
4430 storeLE( mkexpr(addr
), mkexpr(t2
) );
4432 setFlags_INC_DEC( True
, t2
, ty
);
4435 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4436 if (haveLOCK(pfx
)) {
4437 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4438 guest_RIP_curr_instr
);
4440 storeLE( mkexpr(addr
), mkexpr(t2
) );
4442 setFlags_INC_DEC( False
, t2
, ty
);
4449 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)), dis_buf
);
4458 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4461 ULong
dis_Grp5 ( const VexAbiInfo
* vbi
,
4462 Prefix pfx
, Int sz
, Long delta
,
4463 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
4468 IRTemp addr
= IRTemp_INVALID
;
4469 IRType ty
= szToITy(sz
);
4470 IRTemp t1
= newTemp(ty
);
4471 IRTemp t2
= IRTemp_INVALID
;
4472 IRTemp t3
= IRTemp_INVALID
;
4477 modrm
= getUChar(delta
);
4478 if (epartIsReg(modrm
)) {
4479 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4480 F2/CALL and F2/JMP may have bnd prefix. */
4483 && (gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)))
4485 assign(t1
, getIRegE(sz
,pfx
,modrm
));
4486 switch (gregLO3ofRM(modrm
)) {
4489 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4490 mkexpr(t1
), mkU(ty
,1)));
4491 setFlags_INC_DEC( True
, t2
, ty
);
4492 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4496 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4497 mkexpr(t1
), mkU(ty
,1)));
4498 setFlags_INC_DEC( False
, t2
, ty
);
4499 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4501 case 2: /* call Ev */
4502 /* Ignore any sz value and operate as if sz==8. */
4503 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4504 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4506 t3
= newTemp(Ity_I64
);
4507 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4508 t2
= newTemp(Ity_I64
);
4509 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4510 putIReg64(R_RSP
, mkexpr(t2
));
4511 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+1));
4512 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(reg)");
4513 jmp_treg(dres
, Ijk_Call
, t3
);
4514 vassert(dres
->whatNext
== Dis_StopHere
);
4517 case 4: /* jmp Ev */
4518 /* Ignore any sz value and operate as if sz==8. */
4519 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4520 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4522 t3
= newTemp(Ity_I64
);
4523 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4524 jmp_treg(dres
, Ijk_Boring
, t3
);
4525 vassert(dres
->whatNext
== Dis_StopHere
);
4528 case 6: /* PUSH Ev */
4529 /* There is no encoding for 32-bit operand size; hence ... */
4530 if (sz
== 4) sz
= 8;
4531 if (sz
== 8 || sz
== 2) {
4532 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4534 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4535 t2
= newTemp(Ity_I64
);
4536 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4537 putIReg64(R_RSP
, mkexpr(t2
) );
4538 storeLE( mkexpr(t2
), mkexpr(t3
) );
4541 goto unhandledR
; /* awaiting test case */
4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4550 showSz
? nameISize(sz
) : ' ',
4551 nameIRegE(sz
, pfx
, modrm
));
4553 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4554 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4555 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4556 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4558 } else if ((gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)
4559 && (haveF2(pfx
) && !haveF3(pfx
))) {
4562 if (!validF2orF3
) goto unhandledM
;
4564 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
4565 if (gregLO3ofRM(modrm
) != 2 && gregLO3ofRM(modrm
) != 4
4566 && gregLO3ofRM(modrm
) != 6) {
4567 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4569 switch (gregLO3ofRM(modrm
)) {
4572 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4573 mkexpr(t1
), mkU(ty
,1)));
4574 if (haveLOCK(pfx
)) {
4575 casLE( mkexpr(addr
),
4576 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4578 storeLE(mkexpr(addr
),mkexpr(t2
));
4580 setFlags_INC_DEC( True
, t2
, ty
);
4584 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4585 mkexpr(t1
), mkU(ty
,1)));
4586 if (haveLOCK(pfx
)) {
4587 casLE( mkexpr(addr
),
4588 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4590 storeLE(mkexpr(addr
),mkexpr(t2
));
4592 setFlags_INC_DEC( False
, t2
, ty
);
4594 case 2: /* call Ev */
4595 /* Ignore any sz value and operate as if sz==8. */
4596 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4597 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4599 t3
= newTemp(Ity_I64
);
4600 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4601 t2
= newTemp(Ity_I64
);
4602 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4603 putIReg64(R_RSP
, mkexpr(t2
));
4604 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+len
));
4605 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(mem)");
4606 jmp_treg(dres
, Ijk_Call
, t3
);
4607 vassert(dres
->whatNext
== Dis_StopHere
);
4610 case 4: /* JMP Ev */
4611 /* Ignore any sz value and operate as if sz==8. */
4612 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4613 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4615 t3
= newTemp(Ity_I64
);
4616 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4617 jmp_treg(dres
, Ijk_Boring
, t3
);
4618 vassert(dres
->whatNext
== Dis_StopHere
);
4621 case 6: /* PUSH Ev */
4622 /* There is no encoding for 32-bit operand size; hence ... */
4623 if (sz
== 4) sz
= 8;
4624 if (sz
== 8 || sz
== 2) {
4625 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4627 assign(t3
, loadLE(ty
,mkexpr(addr
)));
4628 t2
= newTemp(Ity_I64
);
4629 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4630 putIReg64(R_RSP
, mkexpr(t2
) );
4631 storeLE( mkexpr(t2
), mkexpr(t3
) );
4634 goto unhandledM
; /* awaiting test case */
4642 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4643 showSz
? nameISize(sz
) : ' ',
4650 /*------------------------------------------------------------*/
4651 /*--- Disassembling string ops (including REP prefixes) ---*/
4652 /*------------------------------------------------------------*/
4654 /* Code shared by all the string ops */
4656 void dis_string_op_increment ( Int sz
, IRTemp t_inc
)
4659 if (sz
== 8 || sz
== 4 || sz
== 2) {
4661 if (sz
== 4) logSz
= 2;
4662 if (sz
== 8) logSz
= 3;
4664 binop(Iop_Shl64
, IRExpr_Get( OFFB_DFLAG
, Ity_I64
),
4668 IRExpr_Get( OFFB_DFLAG
, Ity_I64
) );
4673 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
, Prefix pfx
),
4674 Int sz
, const HChar
* name
, Prefix pfx
)
4676 IRTemp t_inc
= newTemp(Ity_I64
);
4677 /* Really we ought to inspect the override prefixes, but we don't.
4678 The following assertion catches any resulting sillyness. */
4679 vassert(pfx
== clearSegBits(pfx
));
4680 dis_string_op_increment(sz
, t_inc
);
4681 dis_OP( sz
, t_inc
, pfx
);
4682 DIP("%s%c\n", name
, nameISize(sz
));
4686 void dis_MOVS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4688 IRType ty
= szToITy(sz
);
4689 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4690 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4691 IRExpr
*incd
, *incs
;
4694 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4695 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4697 assign( td
, getIReg64(R_RDI
) );
4698 assign( ts
, getIReg64(R_RSI
) );
4701 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
4703 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4704 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4706 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4707 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4709 putIReg64( R_RDI
, incd
);
4710 putIReg64( R_RSI
, incs
);
4714 void dis_LODS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4716 IRType ty
= szToITy(sz
);
4717 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4721 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4723 assign( ts
, getIReg64(R_RSI
) );
4725 putIRegRAX ( sz
, loadLE(ty
, mkexpr(ts
)) );
4727 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4729 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4730 putIReg64( R_RSI
, incs
);
4734 void dis_STOS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4736 IRType ty
= szToITy(sz
);
4737 IRTemp ta
= newTemp(ty
); /* rAX */
4738 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4741 assign( ta
, getIRegRAX(sz
) );
4744 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4746 assign( td
, getIReg64(R_RDI
) );
4748 storeLE( mkexpr(td
), mkexpr(ta
) );
4750 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4752 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4753 putIReg64( R_RDI
, incd
);
4757 void dis_CMPS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4759 IRType ty
= szToITy(sz
);
4760 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4761 IRTemp tsv
= newTemp(ty
); /* (RSI) */
4762 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4763 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4764 IRExpr
*incd
, *incs
;
4767 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4768 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4770 assign( td
, getIReg64(R_RDI
) );
4771 assign( ts
, getIReg64(R_RSI
) );
4774 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4776 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
4778 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
4780 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4781 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4783 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4784 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4786 putIReg64( R_RDI
, incd
);
4787 putIReg64( R_RSI
, incs
);
4791 void dis_SCAS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4793 IRType ty
= szToITy(sz
);
4794 IRTemp ta
= newTemp(ty
); /* rAX */
4795 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4796 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4799 assign( ta
, getIRegRAX(sz
) );
4802 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4804 assign( td
, getIReg64(R_RDI
) );
4806 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4808 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
4810 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4812 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4813 putIReg64( R_RDI
, incd
);
4817 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4818 the insn is the last one in the basic block, and so emit a jump to
4819 the next insn, rather than just falling through. */
4821 void dis_REP_op ( /*MOD*/DisResult
* dres
,
4823 void (*dis_OP
)(Int
, IRTemp
, Prefix
),
4824 Int sz
, Addr64 rip
, Addr64 rip_next
, const HChar
* name
,
4827 IRTemp t_inc
= newTemp(Ity_I64
);
4831 /* Really we ought to inspect the override prefixes, but we don't.
4832 The following assertion catches any resulting sillyness. */
4833 vassert(pfx
== clearSegBits(pfx
));
4836 tc
= newTemp(Ity_I32
); /* ECX */
4837 assign( tc
, getIReg32(R_RCX
) );
4838 cmp
= binop(Iop_CmpEQ32
, mkexpr(tc
), mkU32(0));
4840 tc
= newTemp(Ity_I64
); /* RCX */
4841 assign( tc
, getIReg64(R_RCX
) );
4842 cmp
= binop(Iop_CmpEQ64
, mkexpr(tc
), mkU64(0));
4845 stmt( IRStmt_Exit( cmp
, Ijk_Boring
,
4846 IRConst_U64(rip_next
), OFFB_RIP
) );
4849 putIReg32(R_RCX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
4851 putIReg64(R_RCX
, binop(Iop_Sub64
, mkexpr(tc
), mkU64(1)) );
4853 dis_string_op_increment(sz
, t_inc
);
4854 dis_OP (sz
, t_inc
, pfx
);
4856 if (cond
== AMD64CondAlways
) {
4857 jmp_lit(dres
, Ijk_Boring
, rip
);
4858 vassert(dres
->whatNext
== Dis_StopHere
);
4860 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond
),
4864 jmp_lit(dres
, Ijk_Boring
, rip_next
);
4865 vassert(dres
->whatNext
== Dis_StopHere
);
4867 DIP("%s%c\n", name
, nameISize(sz
));
4871 /*------------------------------------------------------------*/
4872 /*--- Arithmetic, etc. ---*/
4873 /*------------------------------------------------------------*/
4875 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4877 ULong
dis_mul_E_G ( const VexAbiInfo
* vbi
,
4884 UChar rm
= getUChar(delta0
);
4885 IRType ty
= szToITy(size
);
4886 IRTemp te
= newTemp(ty
);
4887 IRTemp tg
= newTemp(ty
);
4888 IRTemp resLo
= newTemp(ty
);
4890 assign( tg
, getIRegG(size
, pfx
, rm
) );
4891 if (epartIsReg(rm
)) {
4892 assign( te
, getIRegE(size
, pfx
, rm
) );
4894 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta0
, dis_buf
, 0 );
4895 assign( te
, loadLE(ty
,mkexpr(addr
)) );
4898 setFlags_MUL ( ty
, te
, tg
, AMD64G_CC_OP_SMULB
);
4900 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
4902 putIRegG(size
, pfx
, rm
, mkexpr(resLo
) );
4904 if (epartIsReg(rm
)) {
4905 DIP("imul%c %s, %s\n", nameISize(size
),
4906 nameIRegE(size
,pfx
,rm
),
4907 nameIRegG(size
,pfx
,rm
));
4910 DIP("imul%c %s, %s\n", nameISize(size
),
4912 nameIRegG(size
,pfx
,rm
));
4918 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4920 ULong
dis_imul_I_E_G ( const VexAbiInfo
* vbi
,
4929 UChar rm
= getUChar(delta
);
4930 IRType ty
= szToITy(size
);
4931 IRTemp te
= newTemp(ty
);
4932 IRTemp tl
= newTemp(ty
);
4933 IRTemp resLo
= newTemp(ty
);
4935 vassert(/*size == 1 ||*/ size
== 2 || size
== 4 || size
== 8);
4937 if (epartIsReg(rm
)) {
4938 assign(te
, getIRegE(size
, pfx
, rm
));
4941 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
4943 assign(te
, loadLE(ty
, mkexpr(addr
)));
4946 d64
= getSDisp(imin(4,litsize
),delta
);
4947 delta
+= imin(4,litsize
);
4949 d64
&= mkSizeMask(size
);
4950 assign(tl
, mkU(ty
,d64
));
4952 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
4954 setFlags_MUL ( ty
, te
, tl
, AMD64G_CC_OP_SMULB
);
4956 putIRegG(size
, pfx
, rm
, mkexpr(resLo
));
4958 DIP("imul%c $%lld, %s, %s\n",
4959 nameISize(size
), d64
,
4960 ( epartIsReg(rm
) ? nameIRegE(size
,pfx
,rm
) : dis_buf
),
4961 nameIRegG(size
,pfx
,rm
) );
4966 /* Generate an IR sequence to do a popcount operation on the supplied
4967 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4968 Ity_I16, Ity_I32 or Ity_I64 only. */
4969 static IRTemp
gen_POPCOUNT ( IRType ty
, IRTemp src
)
4972 if (ty
== Ity_I16
) {
4973 IRTemp old
= IRTemp_INVALID
;
4974 IRTemp nyu
= IRTemp_INVALID
;
4975 IRTemp mask
[4], shift
[4];
4976 for (i
= 0; i
< 4; i
++) {
4977 mask
[i
] = newTemp(ty
);
4980 assign(mask
[0], mkU16(0x5555));
4981 assign(mask
[1], mkU16(0x3333));
4982 assign(mask
[2], mkU16(0x0F0F));
4983 assign(mask
[3], mkU16(0x00FF));
4985 for (i
= 0; i
< 4; i
++) {
4993 binop(Iop_Shr16
, mkexpr(old
), mkU8(shift
[i
])),
4999 if (ty
== Ity_I32
) {
5000 IRTemp old
= IRTemp_INVALID
;
5001 IRTemp nyu
= IRTemp_INVALID
;
5002 IRTemp mask
[5], shift
[5];
5003 for (i
= 0; i
< 5; i
++) {
5004 mask
[i
] = newTemp(ty
);
5007 assign(mask
[0], mkU32(0x55555555));
5008 assign(mask
[1], mkU32(0x33333333));
5009 assign(mask
[2], mkU32(0x0F0F0F0F));
5010 assign(mask
[3], mkU32(0x00FF00FF));
5011 assign(mask
[4], mkU32(0x0000FFFF));
5013 for (i
= 0; i
< 5; i
++) {
5021 binop(Iop_Shr32
, mkexpr(old
), mkU8(shift
[i
])),
5027 if (ty
== Ity_I64
) {
5028 IRTemp old
= IRTemp_INVALID
;
5029 IRTemp nyu
= IRTemp_INVALID
;
5030 IRTemp mask
[6], shift
[6];
5031 for (i
= 0; i
< 6; i
++) {
5032 mask
[i
] = newTemp(ty
);
5035 assign(mask
[0], mkU64(0x5555555555555555ULL
));
5036 assign(mask
[1], mkU64(0x3333333333333333ULL
));
5037 assign(mask
[2], mkU64(0x0F0F0F0F0F0F0F0FULL
));
5038 assign(mask
[3], mkU64(0x00FF00FF00FF00FFULL
));
5039 assign(mask
[4], mkU64(0x0000FFFF0000FFFFULL
));
5040 assign(mask
[5], mkU64(0x00000000FFFFFFFFULL
));
5042 for (i
= 0; i
< 6; i
++) {
5050 binop(Iop_Shr64
, mkexpr(old
), mkU8(shift
[i
])),
5061 /* Generate an IR sequence to do a count-leading-zeroes operation on
5062 the supplied IRTemp, and return a new IRTemp holding the result.
5063 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5064 the argument is zero, return the number of bits in the word (the
5065 natural semantics). */
5066 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
5068 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
5070 IRTemp src64
= newTemp(Ity_I64
);
5071 assign(src64
, widenUto64( mkexpr(src
) ));
5073 IRTemp src64x
= newTemp(Ity_I64
);
5075 binop(Iop_Shl64
, mkexpr(src64
),
5076 mkU8(64 - 8 * sizeofIRType(ty
))));
5078 // Clz64 has undefined semantics when its input is zero, so
5079 // special-case around that.
5080 IRTemp res64
= newTemp(Ity_I64
);
5083 binop(Iop_CmpEQ64
, mkexpr(src64x
), mkU64(0)),
5084 mkU64(8 * sizeofIRType(ty
)),
5085 unop(Iop_Clz64
, mkexpr(src64x
))
5088 IRTemp res
= newTemp(ty
);
5089 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5094 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5095 the supplied IRTemp, and return a new IRTemp holding the result.
5096 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5097 the argument is zero, return the number of bits in the word (the
5098 natural semantics). */
5099 static IRTemp
gen_TZCNT ( IRType ty
, IRTemp src
)
5101 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
5103 IRTemp src64
= newTemp(Ity_I64
);
5104 assign(src64
, widenUto64( mkexpr(src
) ));
5106 // Ctz64 has undefined semantics when its input is zero, so
5107 // special-case around that.
5108 IRTemp res64
= newTemp(Ity_I64
);
5111 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0)),
5112 mkU64(8 * sizeofIRType(ty
)),
5113 unop(Iop_Ctz64
, mkexpr(src64
))
5116 IRTemp res
= newTemp(ty
);
5117 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5122 /*------------------------------------------------------------*/
5124 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5126 /*------------------------------------------------------------*/
5128 /* --- Helper functions for dealing with the register stack. --- */
5130 /* --- Set the emulation-warning pseudo-register. --- */
5132 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
5134 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5135 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
5138 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5140 static IRExpr
* mkQNaN64 ( void )
5142 /* QNaN is 0 2047 1 0(51times)
5143 == 0b 11111111111b 1 0(51times)
5144 == 0x7FF8 0000 0000 0000
5146 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
5149 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5151 static IRExpr
* get_ftop ( void )
5153 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
5156 static void put_ftop ( IRExpr
* e
)
5158 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5159 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
5162 /* --------- Get/put the C3210 bits. --------- */
5164 static IRExpr
* /* :: Ity_I64 */ get_C3210 ( void )
5166 return IRExpr_Get( OFFB_FC3210
, Ity_I64
);
5169 static void put_C3210 ( IRExpr
* e
/* :: Ity_I64 */ )
5171 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
5172 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
5175 /* --------- Get/put the FPU rounding mode. --------- */
5176 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
5178 return unop(Iop_64to32
, IRExpr_Get( OFFB_FPROUND
, Ity_I64
));
5181 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
5183 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5184 stmt( IRStmt_Put( OFFB_FPROUND
, unop(Iop_32Uto64
,e
) ) );
5188 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5189 /* Produces a value in 0 .. 3, which is encoded as per the type
5190 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5191 per IRRoundingMode, we merely need to get it and mask it for
5194 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
5196 return binop( Iop_And32
, get_fpround(), mkU32(3) );
5199 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5201 return mkU32(Irrm_NEAREST
);
5205 /* --------- Get/set FP register tag bytes. --------- */
5207 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5209 static void put_ST_TAG ( Int i
, IRExpr
* value
)
5212 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
5213 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5214 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5217 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5218 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5220 static IRExpr
* get_ST_TAG ( Int i
)
5222 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5223 return IRExpr_GetI( descr
, get_ftop(), i
);
5227 /* --------- Get/set FP registers. --------- */
5229 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5230 register's tag to indicate the register is full. The previous
5231 state of the register is not checked. */
5233 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
5236 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
5237 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5238 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5239 /* Mark the register as in-use. */
5240 put_ST_TAG(i
, mkU8(1));
5243 /* Given i, and some expression e, emit
5244 ST(i) = is_full(i) ? NaN : e
5245 and set the tag accordingly.
5248 static void put_ST ( Int i
, IRExpr
* value
)
5252 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5253 /* non-0 means full */
5262 /* Given i, generate an expression yielding 'ST(i)'. */
5264 static IRExpr
* get_ST_UNCHECKED ( Int i
)
5266 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5267 return IRExpr_GetI( descr
, get_ftop(), i
);
5271 /* Given i, generate an expression yielding
5272 is_full(i) ? ST(i) : NaN
5275 static IRExpr
* get_ST ( Int i
)
5278 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5279 /* non-0 means full */
5280 get_ST_UNCHECKED(i
),
5286 /* Given i, and some expression e, and a condition cond, generate IR
5287 which has the same effect as put_ST(i,e) when cond is true and has
5288 no effect when cond is false. Given the lack of proper
5289 if-then-else in the IR, this is pretty tricky.
5292 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
5294 // new_tag = if cond then FULL else old_tag
5295 // new_val = if cond then (if old_tag==FULL then NaN else val)
5298 IRTemp old_tag
= newTemp(Ity_I8
);
5299 assign(old_tag
, get_ST_TAG(i
));
5300 IRTemp new_tag
= newTemp(Ity_I8
);
5302 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
5304 IRTemp old_val
= newTemp(Ity_F64
);
5305 assign(old_val
, get_ST_UNCHECKED(i
));
5306 IRTemp new_val
= newTemp(Ity_F64
);
5308 IRExpr_ITE(mkexpr(cond
),
5309 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
5310 /* non-0 means full */
5316 put_ST_UNCHECKED(i
, mkexpr(new_val
));
5317 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5318 // now set it to new_tag instead.
5319 put_ST_TAG(i
, mkexpr(new_tag
));
5322 /* Adjust FTOP downwards by one register. */
5324 static void fp_push ( void )
5326 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
5329 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5332 static void maybe_fp_push ( IRTemp cond
)
5334 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
5337 /* Adjust FTOP upwards by one register, and mark the vacated register
5340 static void fp_pop ( void )
5342 put_ST_TAG(0, mkU8(0));
5343 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
5346 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5349 static void set_C2 ( IRExpr
* e
)
5351 IRExpr
* cleared
= binop(Iop_And64
, get_C3210(), mkU64(~AMD64G_FC_MASK_C2
));
5352 put_C3210( binop(Iop_Or64
,
5354 binop(Iop_Shl64
, e
, mkU8(AMD64G_FC_SHIFT_C2
))) );
5357 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5358 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5359 test is simple, but the derivation of it is not so simple.
5361 The exponent field for an IEEE754 double is 11 bits. That means it
5362 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5363 the number is either a NaN or an Infinity and so is not finite.
5364 Furthermore, a finite value of exactly 2^63 is the smallest value
5365 that has exponent value 0x43E. Hence, what we need to do is
5366 extract the exponent, ignoring the sign bit and mantissa, and check
5367 it is < 0x43E, or <= 0x43D.
5369 To make this easily applicable to 32- and 64-bit targets, a
5370 roundabout approach is used. First the number is converted to I64,
5371 then the top 32 bits are taken. Shifting them right by 20 bits
5372 places the sign bit and exponent in the bottom 12 bits. Anding
5373 with 0x7FF gets rid of the sign bit, leaving just the exponent
5374 available for comparison.
5376 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
5378 IRTemp i64
= newTemp(Ity_I64
);
5379 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
5380 IRTemp exponent
= newTemp(Ity_I32
);
5383 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
5385 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
5386 assign(in_range_and_finite
,
5387 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
5388 return in_range_and_finite
;
5391 /* Invent a plausible-looking FPU status word value:
5392 ((ftop & 7) << 11) | (c3210 & 0x4700)
5394 static IRExpr
* get_FPU_sw ( void )
5400 binop(Iop_And32
, get_ftop(), mkU32(7)),
5402 binop(Iop_And32
, unop(Iop_64to32
, get_C3210()),
5408 /* Generate a dirty helper call that initialises the x87 state a la
5409 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5410 |guard| is used as a guarding condition.
5412 static void gen_FINIT_SEQUENCE ( IRExpr
* guard
)
5414 /* Uses dirty helper:
5415 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5416 IRDirty
* d
= unsafeIRDirty_0_N (
5418 "amd64g_dirtyhelper_FINIT",
5419 &amd64g_dirtyhelper_FINIT
,
5420 mkIRExprVec_1( IRExpr_GSPTR() )
5423 /* declare we're writing guest state */
5425 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5427 d
->fxState
[0].fx
= Ifx_Write
;
5428 d
->fxState
[0].offset
= OFFB_FTOP
;
5429 d
->fxState
[0].size
= sizeof(UInt
);
5431 d
->fxState
[1].fx
= Ifx_Write
;
5432 d
->fxState
[1].offset
= OFFB_FPREGS
;
5433 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5435 d
->fxState
[2].fx
= Ifx_Write
;
5436 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5437 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5439 d
->fxState
[3].fx
= Ifx_Write
;
5440 d
->fxState
[3].offset
= OFFB_FPROUND
;
5441 d
->fxState
[3].size
= sizeof(ULong
);
5443 d
->fxState
[4].fx
= Ifx_Write
;
5444 d
->fxState
[4].offset
= OFFB_FC3210
;
5445 d
->fxState
[4].size
= sizeof(ULong
);
5450 stmt( IRStmt_Dirty(d
) );
5454 /* ------------------------------------------------------- */
5455 /* Given all that stack-mangling junk, we can now go ahead
5456 and describe FP instructions.
5459 /* ST(0) = ST(0) `op` mem64/32(addr)
5460 Need to check ST(0)'s tag on read, but not on write.
5463 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5466 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5470 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5472 loadLE(Ity_F64
,mkexpr(addr
))
5477 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5479 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
5485 /* ST(0) = mem64/32(addr) `op` ST(0)
5486 Need to check ST(0)'s tag on read, but not on write.
5489 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5492 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5496 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5497 loadLE(Ity_F64
,mkexpr(addr
)),
5503 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5504 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
5511 /* ST(dst) = ST(dst) `op` ST(src).
5512 Check dst and src tags when reading but not on write.
5515 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5518 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5522 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5530 /* ST(dst) = ST(src) `op` ST(dst).
5531 Check dst and src tags when reading but not on write.
5534 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5537 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5541 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5549 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5550 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
5552 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
5553 /* This is a bit of a hack (and isn't really right). It sets
5554 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5555 documentation implies A and S are unchanged.
5557 /* It's also fishy in that it is used both for COMIP and
5558 UCOMIP, and they aren't the same (although similar). */
5559 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
5560 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
5565 binop(Iop_CmpF64
, get_ST(0), get_ST(i
))),
5568 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
5575 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5577 static IRExpr
* x87ishly_qnarrow_32_to_16 ( IRExpr
* e32
)
5579 IRTemp t32
= newTemp(Ity_I32
);
5585 binop(Iop_Add32
, mkexpr(t32
), mkU32(32768))),
5587 unop(Iop_32to16
, mkexpr(t32
)),
5593 ULong
dis_FPU ( /*OUT*/Bool
* decode_ok
,
5594 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
5601 /* On entry, delta points at the second byte of the insn (the modrm
5603 UChar first_opcode
= getUChar(delta
-1);
5604 UChar modrm
= getUChar(delta
+0);
5606 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5608 if (first_opcode
== 0xD8) {
5611 /* bits 5,4,3 are an opcode extension, and the modRM also
5612 specifies an address. */
5613 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5616 switch (gregLO3ofRM(modrm
)) {
5618 case 0: /* FADD single-real */
5619 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
5622 case 1: /* FMUL single-real */
5623 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
5626 case 2: /* FCOM single-real */
5627 DIP("fcoms %s\n", dis_buf
);
5628 /* This forces C1 to zero, which isn't right. */
5629 /* The AMD documentation suggests that forcing C1 to
5630 zero is correct (Eliot Moss) */
5638 loadLE(Ity_F32
,mkexpr(addr
)))),
5644 case 3: /* FCOMP single-real */
5645 /* The AMD documentation suggests that forcing C1 to
5646 zero is correct (Eliot Moss) */
5647 DIP("fcomps %s\n", dis_buf
);
5648 /* This forces C1 to zero, which isn't right. */
5656 loadLE(Ity_F32
,mkexpr(addr
)))),
5663 case 4: /* FSUB single-real */
5664 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
5667 case 5: /* FSUBR single-real */
5668 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
5671 case 6: /* FDIV single-real */
5672 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
5675 case 7: /* FDIVR single-real */
5676 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
5680 vex_printf("unhandled opc_aux = 0x%2x\n",
5681 (UInt
)gregLO3ofRM(modrm
));
5682 vex_printf("first_opcode == 0xD8\n");
5689 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5690 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
5693 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5694 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
5697 /* Dunno if this is right */
5698 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5699 r_dst
= (UInt
)modrm
- 0xD0;
5700 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
5701 /* This forces C1 to zero, which isn't right. */
5706 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5712 /* Dunno if this is right */
5713 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5714 r_dst
= (UInt
)modrm
- 0xD8;
5715 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
5716 /* This forces C1 to zero, which isn't right. */
5721 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5728 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5729 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
5732 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5733 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
5736 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5737 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
5740 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5741 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
5750 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5752 if (first_opcode
== 0xD9) {
5755 /* bits 5,4,3 are an opcode extension, and the modRM also
5756 specifies an address. */
5757 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5760 switch (gregLO3ofRM(modrm
)) {
5762 case 0: /* FLD single-real */
5763 DIP("flds %s\n", dis_buf
);
5765 put_ST(0, unop(Iop_F32toF64
,
5766 loadLE(Ity_F32
, mkexpr(addr
))));
5769 case 2: /* FST single-real */
5770 DIP("fsts %s\n", dis_buf
);
5771 storeLE(mkexpr(addr
),
5772 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5775 case 3: /* FSTP single-real */
5776 DIP("fstps %s\n", dis_buf
);
5777 storeLE(mkexpr(addr
),
5778 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5782 case 4: { /* FLDENV m28 */
5783 /* Uses dirty helper:
5784 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5785 IRTemp ew
= newTemp(Ity_I32
);
5786 IRTemp w64
= newTemp(Ity_I64
);
5787 IRDirty
* d
= unsafeIRDirty_0_N (
5789 "amd64g_dirtyhelper_FLDENV",
5790 &amd64g_dirtyhelper_FLDENV
,
5791 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5794 /* declare we're reading memory */
5796 d
->mAddr
= mkexpr(addr
);
5799 /* declare we're writing guest state */
5801 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5803 d
->fxState
[0].fx
= Ifx_Write
;
5804 d
->fxState
[0].offset
= OFFB_FTOP
;
5805 d
->fxState
[0].size
= sizeof(UInt
);
5807 d
->fxState
[1].fx
= Ifx_Write
;
5808 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5809 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5811 d
->fxState
[2].fx
= Ifx_Write
;
5812 d
->fxState
[2].offset
= OFFB_FPROUND
;
5813 d
->fxState
[2].size
= sizeof(ULong
);
5815 d
->fxState
[3].fx
= Ifx_Write
;
5816 d
->fxState
[3].offset
= OFFB_FC3210
;
5817 d
->fxState
[3].size
= sizeof(ULong
);
5819 stmt( IRStmt_Dirty(d
) );
5821 /* ew contains any emulation warning we may need to
5822 issue. If needed, side-exit to the next insn,
5823 reporting the warning, so that Valgrind's dispatcher
5824 sees the warning. */
5825 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
5826 put_emwarn( mkexpr(ew
) );
5829 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5831 IRConst_U64( guest_RIP_bbstart
+delta
),
5836 DIP("fldenv %s\n", dis_buf
);
5840 case 5: {/* FLDCW */
5841 /* The only thing we observe in the control word is the
5842 rounding mode. Therefore, pass the 16-bit value
5843 (x87 native-format control word) to a clean helper,
5844 getting back a 64-bit value, the lower half of which
5845 is the FPROUND value to store, and the upper half of
5846 which is the emulation-warning token which may be
5849 /* ULong amd64h_check_fldcw ( ULong ); */
5850 IRTemp t64
= newTemp(Ity_I64
);
5851 IRTemp ew
= newTemp(Ity_I32
);
5852 DIP("fldcw %s\n", dis_buf
);
5853 assign( t64
, mkIRExprCCall(
5854 Ity_I64
, 0/*regparms*/,
5855 "amd64g_check_fldcw",
5856 &amd64g_check_fldcw
,
5859 loadLE(Ity_I16
, mkexpr(addr
)))
5864 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
5865 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
5866 put_emwarn( mkexpr(ew
) );
5867 /* Finally, if an emulation warning was reported,
5868 side-exit to the next insn, reporting the warning,
5869 so that Valgrind's dispatcher sees the warning. */
5872 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5874 IRConst_U64( guest_RIP_bbstart
+delta
),
5881 case 6: { /* FNSTENV m28 */
5882 /* Uses dirty helper:
5883 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5884 IRDirty
* d
= unsafeIRDirty_0_N (
5886 "amd64g_dirtyhelper_FSTENV",
5887 &amd64g_dirtyhelper_FSTENV
,
5888 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5890 /* declare we're writing memory */
5892 d
->mAddr
= mkexpr(addr
);
5895 /* declare we're reading guest state */
5897 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5899 d
->fxState
[0].fx
= Ifx_Read
;
5900 d
->fxState
[0].offset
= OFFB_FTOP
;
5901 d
->fxState
[0].size
= sizeof(UInt
);
5903 d
->fxState
[1].fx
= Ifx_Read
;
5904 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5905 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5907 d
->fxState
[2].fx
= Ifx_Read
;
5908 d
->fxState
[2].offset
= OFFB_FPROUND
;
5909 d
->fxState
[2].size
= sizeof(ULong
);
5911 d
->fxState
[3].fx
= Ifx_Read
;
5912 d
->fxState
[3].offset
= OFFB_FC3210
;
5913 d
->fxState
[3].size
= sizeof(ULong
);
5915 stmt( IRStmt_Dirty(d
) );
5917 DIP("fnstenv %s\n", dis_buf
);
5921 case 7: /* FNSTCW */
5922 /* Fake up a native x87 FPU control word. The only
5923 thing it depends on is FPROUND[1:0], so call a clean
5924 helper to cook it up. */
5925 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5926 DIP("fnstcw %s\n", dis_buf
);
5932 "amd64g_create_fpucw", &amd64g_create_fpucw
,
5933 mkIRExprVec_1( unop(Iop_32Uto64
, get_fpround()) )
5940 vex_printf("unhandled opc_aux = 0x%2x\n",
5941 (UInt
)gregLO3ofRM(modrm
));
5942 vex_printf("first_opcode == 0xD9\n");
5950 case 0xC0 ... 0xC7: /* FLD %st(?) */
5951 r_src
= (UInt
)modrm
- 0xC0;
5952 DIP("fld %%st(%u)\n", r_src
);
5953 t1
= newTemp(Ity_F64
);
5954 assign(t1
, get_ST(r_src
));
5956 put_ST(0, mkexpr(t1
));
5959 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5960 r_src
= (UInt
)modrm
- 0xC8;
5961 DIP("fxch %%st(%u)\n", r_src
);
5962 t1
= newTemp(Ity_F64
);
5963 t2
= newTemp(Ity_F64
);
5964 assign(t1
, get_ST(0));
5965 assign(t2
, get_ST(r_src
));
5966 put_ST_UNCHECKED(0, mkexpr(t2
));
5967 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
5970 case 0xE0: /* FCHS */
5972 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
5975 case 0xE1: /* FABS */
5977 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
5980 case 0xE5: { /* FXAM */
5981 /* This is an interesting one. It examines %st(0),
5982 regardless of whether the tag says it's empty or not.
5983 Here, just pass both the tag (in our format) and the
5984 value (as a double, actually a ULong) to a helper
5987 = mkIRExprVec_2( unop(Iop_8Uto64
, get_ST_TAG(0)),
5988 unop(Iop_ReinterpF64asI64
,
5989 get_ST_UNCHECKED(0)) );
5990 put_C3210(mkIRExprCCall(
5993 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM
,
6000 case 0xE8: /* FLD1 */
6003 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
6004 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
6007 case 0xE9: /* FLDL2T */
6010 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
6011 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
6014 case 0xEA: /* FLDL2E */
6017 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
6018 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
6021 case 0xEB: /* FLDPI */
6024 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
6025 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
6028 case 0xEC: /* FLDLG2 */
6031 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
6032 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
6035 case 0xED: /* FLDLN2 */
6038 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
6039 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
6042 case 0xEE: /* FLDZ */
6045 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
6046 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
6049 case 0xF0: /* F2XM1 */
6053 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6057 case 0xF1: /* FYL2X */
6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6067 case 0xF2: { /* FPTAN */
6069 IRTemp argD
= newTemp(Ity_F64
);
6070 assign(argD
, get_ST(0));
6071 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6072 IRTemp resD
= newTemp(Ity_F64
);
6077 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6081 put_ST_UNCHECKED(0, mkexpr(resD
));
6082 /* Conditionally push 1.0 on the stack, if the arg is
6084 maybe_fp_push(argOK
);
6085 maybe_put_ST(argOK
, 0,
6086 IRExpr_Const(IRConst_F64(1.0)));
6087 set_C2( binop(Iop_Xor64
,
6088 unop(Iop_1Uto64
, mkexpr(argOK
)),
6093 case 0xF3: /* FPATAN */
6097 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6103 case 0xF4: { /* FXTRACT */
6104 IRTemp argF
= newTemp(Ity_F64
);
6105 IRTemp sigF
= newTemp(Ity_F64
);
6106 IRTemp expF
= newTemp(Ity_F64
);
6107 IRTemp argI
= newTemp(Ity_I64
);
6108 IRTemp sigI
= newTemp(Ity_I64
);
6109 IRTemp expI
= newTemp(Ity_I64
);
6111 assign( argF
, get_ST(0) );
6112 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
6115 Ity_I64
, 0/*regparms*/,
6116 "x86amd64g_calculate_FXTRACT",
6117 &x86amd64g_calculate_FXTRACT
,
6118 mkIRExprVec_2( mkexpr(argI
),
6119 mkIRExpr_HWord(0)/*sig*/ ))
6123 Ity_I64
, 0/*regparms*/,
6124 "x86amd64g_calculate_FXTRACT",
6125 &x86amd64g_calculate_FXTRACT
,
6126 mkIRExprVec_2( mkexpr(argI
),
6127 mkIRExpr_HWord(1)/*exp*/ ))
6129 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
6130 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
6132 put_ST_UNCHECKED(0, mkexpr(expF
) );
6135 put_ST(0, mkexpr(sigF
) );
6139 case 0xF5: { /* FPREM1 -- IEEE compliant */
6140 IRTemp a1
= newTemp(Ity_F64
);
6141 IRTemp a2
= newTemp(Ity_F64
);
6143 /* Do FPREM1 twice, once to get the remainder, and once
6144 to get the C3210 flag values. */
6145 assign( a1
, get_ST(0) );
6146 assign( a2
, get_ST(1) );
6149 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6154 triop(Iop_PRem1C3210F64
,
6155 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6161 case 0xF7: /* FINCSTP */
6163 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
6166 case 0xF8: { /* FPREM -- not IEEE compliant */
6167 IRTemp a1
= newTemp(Ity_F64
);
6168 IRTemp a2
= newTemp(Ity_F64
);
6170 /* Do FPREM twice, once to get the remainder, and once
6171 to get the C3210 flag values. */
6172 assign( a1
, get_ST(0) );
6173 assign( a2
, get_ST(1) );
6176 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6181 triop(Iop_PRemC3210F64
,
6182 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6188 case 0xF9: /* FYL2XP1 */
6191 triop(Iop_Yl2xp1F64
,
6192 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6198 case 0xFA: /* FSQRT */
6202 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6206 case 0xFB: { /* FSINCOS */
6208 IRTemp argD
= newTemp(Ity_F64
);
6209 assign(argD
, get_ST(0));
6210 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6211 IRTemp resD
= newTemp(Ity_F64
);
6216 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6220 put_ST_UNCHECKED(0, mkexpr(resD
));
6221 /* Conditionally push the cos value on the stack, if
6222 the arg is in range */
6223 maybe_fp_push(argOK
);
6224 maybe_put_ST(argOK
, 0,
6226 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6228 set_C2( binop(Iop_Xor64
,
6229 unop(Iop_1Uto64
, mkexpr(argOK
)),
6234 case 0xFC: /* FRNDINT */
6237 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
6240 case 0xFD: /* FSCALE */
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6249 case 0xFE: /* FSIN */
6250 case 0xFF: { /* FCOS */
6251 Bool isSIN
= modrm
== 0xFE;
6252 DIP("%s\n", isSIN
? "fsin" : "fcos");
6253 IRTemp argD
= newTemp(Ity_F64
);
6254 assign(argD
, get_ST(0));
6255 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6256 IRTemp resD
= newTemp(Ity_F64
);
6260 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
6261 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6265 put_ST_UNCHECKED(0, mkexpr(resD
));
6266 set_C2( binop(Iop_Xor64
,
6267 unop(Iop_1Uto64
, mkexpr(argOK
)),
6278 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6280 if (first_opcode
== 0xDA) {
6284 /* bits 5,4,3 are an opcode extension, and the modRM also
6285 specifies an address. */
6287 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6289 switch (gregLO3ofRM(modrm
)) {
6291 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6292 DIP("fiaddl %s\n", dis_buf
);
6296 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6297 DIP("fimull %s\n", dis_buf
);
6301 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6302 DIP("fisubl %s\n", dis_buf
);
6306 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6307 DIP("fisubrl %s\n", dis_buf
);
6311 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6312 DIP("fisubl %s\n", dis_buf
);
6316 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6317 DIP("fidivrl %s\n", dis_buf
);
6324 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6327 loadLE(Ity_I32
, mkexpr(addr
)))));
6333 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6335 loadLE(Ity_I32
, mkexpr(addr
))),
6340 vex_printf("unhandled opc_aux = 0x%2x\n",
6341 (UInt
)gregLO3ofRM(modrm
));
6342 vex_printf("first_opcode == 0xDA\n");
6351 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6352 r_src
= (UInt
)modrm
- 0xC0;
6353 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
6356 mk_amd64g_calculate_condition(AMD64CondB
),
6357 get_ST(r_src
), get_ST(0)) );
6360 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6361 r_src
= (UInt
)modrm
- 0xC8;
6362 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
6365 mk_amd64g_calculate_condition(AMD64CondZ
),
6366 get_ST(r_src
), get_ST(0)) );
6369 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6370 r_src
= (UInt
)modrm
- 0xD0;
6371 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
6374 mk_amd64g_calculate_condition(AMD64CondBE
),
6375 get_ST(r_src
), get_ST(0)) );
6378 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6379 r_src
= (UInt
)modrm
- 0xD8;
6380 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
6383 mk_amd64g_calculate_condition(AMD64CondP
),
6384 get_ST(r_src
), get_ST(0)) );
6387 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6388 DIP("fucompp %%st(0),%%st(1)\n");
6389 /* This forces C1 to zero, which isn't right. */
6394 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
6409 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6411 if (first_opcode
== 0xDB) {
6414 /* bits 5,4,3 are an opcode extension, and the modRM also
6415 specifies an address. */
6416 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6419 switch (gregLO3ofRM(modrm
)) {
6421 case 0: /* FILD m32int */
6422 DIP("fildl %s\n", dis_buf
);
6424 put_ST(0, unop(Iop_I32StoF64
,
6425 loadLE(Ity_I32
, mkexpr(addr
))));
6428 case 1: /* FISTTPL m32 (SSE3) */
6429 DIP("fisttpl %s\n", dis_buf
);
6430 storeLE( mkexpr(addr
),
6431 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6435 case 2: /* FIST m32 */
6436 DIP("fistl %s\n", dis_buf
);
6437 storeLE( mkexpr(addr
),
6438 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6441 case 3: /* FISTP m32 */
6442 DIP("fistpl %s\n", dis_buf
);
6443 storeLE( mkexpr(addr
),
6444 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6448 case 5: { /* FLD extended-real */
6449 /* Uses dirty helper:
6450 ULong amd64g_loadF80le ( ULong )
6451 addr holds the address. First, do a dirty call to
6452 get hold of the data. */
6453 IRTemp val
= newTemp(Ity_I64
);
6454 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
6456 IRDirty
* d
= unsafeIRDirty_1_N (
6459 "amd64g_dirtyhelper_loadF80le",
6460 &amd64g_dirtyhelper_loadF80le
,
6463 /* declare that we're reading memory */
6465 d
->mAddr
= mkexpr(addr
);
6468 /* execute the dirty call, dumping the result in val. */
6469 stmt( IRStmt_Dirty(d
) );
6471 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
6473 DIP("fldt %s\n", dis_buf
);
6477 case 7: { /* FSTP extended-real */
6478 /* Uses dirty helper:
6479 void amd64g_storeF80le ( ULong addr, ULong data )
6482 = mkIRExprVec_2( mkexpr(addr
),
6483 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
6485 IRDirty
* d
= unsafeIRDirty_0_N (
6487 "amd64g_dirtyhelper_storeF80le",
6488 &amd64g_dirtyhelper_storeF80le
,
6491 /* declare we're writing memory */
6493 d
->mAddr
= mkexpr(addr
);
6496 /* execute the dirty call. */
6497 stmt( IRStmt_Dirty(d
) );
6500 DIP("fstpt\n %s", dis_buf
);
6505 vex_printf("unhandled opc_aux = 0x%2x\n",
6506 (UInt
)gregLO3ofRM(modrm
));
6507 vex_printf("first_opcode == 0xDB\n");
6516 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6517 r_src
= (UInt
)modrm
- 0xC0;
6518 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
6521 mk_amd64g_calculate_condition(AMD64CondNB
),
6522 get_ST(r_src
), get_ST(0)) );
6525 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6526 r_src
= (UInt
)modrm
- 0xC8;
6527 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
6531 mk_amd64g_calculate_condition(AMD64CondNZ
),
6538 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6539 r_src
= (UInt
)modrm
- 0xD0;
6540 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
6544 mk_amd64g_calculate_condition(AMD64CondNBE
),
6551 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6552 r_src
= (UInt
)modrm
- 0xD8;
6553 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
6557 mk_amd64g_calculate_condition(AMD64CondNP
),
6569 gen_FINIT_SEQUENCE(NULL
/*no guarding condition*/);
6574 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6575 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
6578 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6579 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
6588 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6590 if (first_opcode
== 0xDC) {
6593 /* bits 5,4,3 are an opcode extension, and the modRM also
6594 specifies an address. */
6595 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6598 switch (gregLO3ofRM(modrm
)) {
6600 case 0: /* FADD double-real */
6601 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
6604 case 1: /* FMUL double-real */
6605 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
6608 case 2: /* FCOM double-real */
6609 DIP("fcoml %s\n", dis_buf
);
6610 /* This forces C1 to zero, which isn't right. */
6617 loadLE(Ity_F64
,mkexpr(addr
))),
6623 case 3: /* FCOMP double-real */
6624 DIP("fcompl %s\n", dis_buf
);
6625 /* This forces C1 to zero, which isn't right. */
6632 loadLE(Ity_F64
,mkexpr(addr
))),
6639 case 4: /* FSUB double-real */
6640 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
6643 case 5: /* FSUBR double-real */
6644 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
6647 case 6: /* FDIV double-real */
6648 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
6651 case 7: /* FDIVR double-real */
6652 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
6656 vex_printf("unhandled opc_aux = 0x%2x\n",
6657 (UInt
)gregLO3ofRM(modrm
));
6658 vex_printf("first_opcode == 0xDC\n");
6667 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6668 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
6671 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6672 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
6675 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6676 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
6679 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6680 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
6683 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6684 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
6687 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6688 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
6698 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6700 if (first_opcode
== 0xDD) {
6704 /* bits 5,4,3 are an opcode extension, and the modRM also
6705 specifies an address. */
6706 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6709 switch (gregLO3ofRM(modrm
)) {
6711 case 0: /* FLD double-real */
6712 DIP("fldl %s\n", dis_buf
);
6714 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
6717 case 1: /* FISTTPQ m64 (SSE3) */
6718 DIP("fistppll %s\n", dis_buf
);
6719 storeLE( mkexpr(addr
),
6720 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6724 case 2: /* FST double-real */
6725 DIP("fstl %s\n", dis_buf
);
6726 storeLE(mkexpr(addr
), get_ST(0));
6729 case 3: /* FSTP double-real */
6730 DIP("fstpl %s\n", dis_buf
);
6731 storeLE(mkexpr(addr
), get_ST(0));
6735 case 4: { /* FRSTOR m94/m108 */
6736 IRTemp ew
= newTemp(Ity_I32
);
6737 IRTemp w64
= newTemp(Ity_I64
);
6739 if ( have66(pfx
) ) {
6740 /* Uses dirty helper:
6741 VexEmNote amd64g_dirtyhelper_FRSTORS
6742 ( VexGuestAMD64State*, HWord ) */
6743 d
= unsafeIRDirty_0_N (
6745 "amd64g_dirtyhelper_FRSTORS",
6746 &amd64g_dirtyhelper_FRSTORS
,
6747 mkIRExprVec_1( mkexpr(addr
) )
6751 /* Uses dirty helper:
6752 VexEmNote amd64g_dirtyhelper_FRSTOR
6753 ( VexGuestAMD64State*, HWord ) */
6754 d
= unsafeIRDirty_0_N (
6756 "amd64g_dirtyhelper_FRSTOR",
6757 &amd64g_dirtyhelper_FRSTOR
,
6758 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6764 /* declare we're reading memory */
6766 d
->mAddr
= mkexpr(addr
);
6767 /* d->mSize set above */
6769 /* declare we're writing guest state */
6771 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6773 d
->fxState
[0].fx
= Ifx_Write
;
6774 d
->fxState
[0].offset
= OFFB_FTOP
;
6775 d
->fxState
[0].size
= sizeof(UInt
);
6777 d
->fxState
[1].fx
= Ifx_Write
;
6778 d
->fxState
[1].offset
= OFFB_FPREGS
;
6779 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6781 d
->fxState
[2].fx
= Ifx_Write
;
6782 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6783 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6785 d
->fxState
[3].fx
= Ifx_Write
;
6786 d
->fxState
[3].offset
= OFFB_FPROUND
;
6787 d
->fxState
[3].size
= sizeof(ULong
);
6789 d
->fxState
[4].fx
= Ifx_Write
;
6790 d
->fxState
[4].offset
= OFFB_FC3210
;
6791 d
->fxState
[4].size
= sizeof(ULong
);
6793 stmt( IRStmt_Dirty(d
) );
6795 /* ew contains any emulation warning we may need to
6796 issue. If needed, side-exit to the next insn,
6797 reporting the warning, so that Valgrind's dispatcher
6798 sees the warning. */
6799 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
6800 put_emwarn( mkexpr(ew
) );
6803 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
6805 IRConst_U64( guest_RIP_bbstart
+delta
),
6810 if ( have66(pfx
) ) {
6811 DIP("frstors %s\n", dis_buf
);
6813 DIP("frstor %s\n", dis_buf
);
6818 case 6: { /* FNSAVE m94/m108 */
6820 if ( have66(pfx
) ) {
6821 /* Uses dirty helper:
6822 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6824 d
= unsafeIRDirty_0_N (
6826 "amd64g_dirtyhelper_FNSAVES",
6827 &amd64g_dirtyhelper_FNSAVES
,
6828 mkIRExprVec_1( mkexpr(addr
) )
6832 /* Uses dirty helper:
6833 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6835 d
= unsafeIRDirty_0_N (
6837 "amd64g_dirtyhelper_FNSAVE",
6838 &amd64g_dirtyhelper_FNSAVE
,
6839 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6844 /* declare we're writing memory */
6846 d
->mAddr
= mkexpr(addr
);
6847 /* d->mSize set above */
6849 /* declare we're reading guest state */
6851 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6853 d
->fxState
[0].fx
= Ifx_Read
;
6854 d
->fxState
[0].offset
= OFFB_FTOP
;
6855 d
->fxState
[0].size
= sizeof(UInt
);
6857 d
->fxState
[1].fx
= Ifx_Read
;
6858 d
->fxState
[1].offset
= OFFB_FPREGS
;
6859 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6861 d
->fxState
[2].fx
= Ifx_Read
;
6862 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6863 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6865 d
->fxState
[3].fx
= Ifx_Read
;
6866 d
->fxState
[3].offset
= OFFB_FPROUND
;
6867 d
->fxState
[3].size
= sizeof(ULong
);
6869 d
->fxState
[4].fx
= Ifx_Read
;
6870 d
->fxState
[4].offset
= OFFB_FC3210
;
6871 d
->fxState
[4].size
= sizeof(ULong
);
6873 stmt( IRStmt_Dirty(d
) );
6875 if ( have66(pfx
) ) {
6876 DIP("fnsaves %s\n", dis_buf
);
6878 DIP("fnsave %s\n", dis_buf
);
6883 case 7: { /* FNSTSW m16 */
6884 IRExpr
* sw
= get_FPU_sw();
6885 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
6886 storeLE( mkexpr(addr
), sw
);
6887 DIP("fnstsw %s\n", dis_buf
);
6892 vex_printf("unhandled opc_aux = 0x%2x\n",
6893 (UInt
)gregLO3ofRM(modrm
));
6894 vex_printf("first_opcode == 0xDD\n");
6901 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6902 r_dst
= (UInt
)modrm
- 0xC0;
6903 DIP("ffree %%st(%u)\n", r_dst
);
6904 put_ST_TAG ( r_dst
, mkU8(0) );
6907 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6908 r_dst
= (UInt
)modrm
- 0xD0;
6909 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
6910 /* P4 manual says: "If the destination operand is a
6911 non-empty register, the invalid-operation exception
6912 is not generated. Hence put_ST_UNCHECKED. */
6913 put_ST_UNCHECKED(r_dst
, get_ST(0));
6916 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6917 r_dst
= (UInt
)modrm
- 0xD8;
6918 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
6919 /* P4 manual says: "If the destination operand is a
6920 non-empty register, the invalid-operation exception
6921 is not generated. Hence put_ST_UNCHECKED. */
6922 put_ST_UNCHECKED(r_dst
, get_ST(0));
6926 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6927 r_dst
= (UInt
)modrm
- 0xE0;
6928 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
6929 /* This forces C1 to zero, which isn't right. */
6934 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6940 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6941 r_dst
= (UInt
)modrm
- 0xE8;
6942 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
6943 /* This forces C1 to zero, which isn't right. */
6948 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6961 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6963 if (first_opcode
== 0xDE) {
6967 /* bits 5,4,3 are an opcode extension, and the modRM also
6968 specifies an address. */
6970 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6973 switch (gregLO3ofRM(modrm
)) {
6975 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6976 DIP("fiaddw %s\n", dis_buf
);
6980 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6981 DIP("fimulw %s\n", dis_buf
);
6985 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6986 DIP("fisubw %s\n", dis_buf
);
6990 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6991 DIP("fisubrw %s\n", dis_buf
);
6995 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6996 DIP("fisubw %s\n", dis_buf
);
7000 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
7001 DIP("fidivrw %s\n", dis_buf
);
7008 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7012 loadLE(Ity_I16
, mkexpr(addr
))))));
7018 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7021 loadLE(Ity_I16
, mkexpr(addr
)))),
7026 vex_printf("unhandled opc_aux = 0x%2x\n",
7027 (UInt
)gregLO3ofRM(modrm
));
7028 vex_printf("first_opcode == 0xDE\n");
7037 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
7038 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
7041 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
7042 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
7045 case 0xD9: /* FCOMPP %st(0),%st(1) */
7046 DIP("fcompp %%st(0),%%st(1)\n");
7047 /* This forces C1 to zero, which isn't right. */
7052 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
7060 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
7061 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
7064 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
7065 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
7068 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
7069 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
7072 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
7073 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
7083 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
7085 if (first_opcode
== 0xDF) {
7089 /* bits 5,4,3 are an opcode extension, and the modRM also
7090 specifies an address. */
7091 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7094 switch (gregLO3ofRM(modrm
)) {
7096 case 0: /* FILD m16int */
7097 DIP("fildw %s\n", dis_buf
);
7099 put_ST(0, unop(Iop_I32StoF64
,
7101 loadLE(Ity_I16
, mkexpr(addr
)))));
7104 case 1: /* FISTTPS m16 (SSE3) */
7105 DIP("fisttps %s\n", dis_buf
);
7106 storeLE( mkexpr(addr
),
7107 x87ishly_qnarrow_32_to_16(
7108 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) ));
7112 case 2: /* FIST m16 */
7113 DIP("fists %s\n", dis_buf
);
7114 storeLE( mkexpr(addr
),
7115 x87ishly_qnarrow_32_to_16(
7116 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7119 case 3: /* FISTP m16 */
7120 DIP("fistps %s\n", dis_buf
);
7121 storeLE( mkexpr(addr
),
7122 x87ishly_qnarrow_32_to_16(
7123 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7127 case 5: /* FILD m64 */
7128 DIP("fildll %s\n", dis_buf
);
7130 put_ST(0, binop(Iop_I64StoF64
,
7132 loadLE(Ity_I64
, mkexpr(addr
))));
7135 case 7: /* FISTP m64 */
7136 DIP("fistpll %s\n", dis_buf
);
7137 storeLE( mkexpr(addr
),
7138 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
7143 vex_printf("unhandled opc_aux = 0x%2x\n",
7144 (UInt
)gregLO3ofRM(modrm
));
7145 vex_printf("first_opcode == 0xDF\n");
7154 case 0xC0: /* FFREEP %st(0) */
7155 DIP("ffreep %%st(%d)\n", 0);
7156 put_ST_TAG ( 0, mkU8(0) );
7160 case 0xE0: /* FNSTSW %ax */
7161 DIP("fnstsw %%ax\n");
7162 /* Invent a plausible-looking FPU status word value and
7164 ((ftop & 7) << 11) | (c3210 & 0x4700)
7171 binop(Iop_And32
, get_ftop(), mkU32(7)),
7174 unop(Iop_64to32
, get_C3210()),
7179 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7180 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
7183 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7184 /* not really right since COMIP != UCOMIP */
7185 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
7207 /*------------------------------------------------------------*/
7209 /*--- MMX INSTRUCTIONS ---*/
7211 /*------------------------------------------------------------*/
7213 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7214 IA32 arch manual, volume 3):
7216 Read from, or write to MMX register (viz, any insn except EMMS):
7217 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7218 * FP stack pointer set to zero
7221 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7222 * FP stack pointer set to zero
7225 static void do_MMX_preamble ( void )
7228 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7229 IRExpr
* zero
= mkU32(0);
7230 IRExpr
* tag1
= mkU8(1);
7232 for (i
= 0; i
< 8; i
++)
7233 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
7236 static void do_EMMS_preamble ( void )
7239 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7240 IRExpr
* zero
= mkU32(0);
7241 IRExpr
* tag0
= mkU8(0);
7243 for (i
= 0; i
< 8; i
++)
7244 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
7248 static IRExpr
* getMMXReg ( UInt archreg
)
7250 vassert(archreg
< 8);
7251 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
7255 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
7257 vassert(archreg
< 8);
7258 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
7259 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
7263 /* Helper for non-shift MMX insns. Note this is incomplete in the
7264 sense that it does not first call do_MMX_preamble() -- that is the
7265 responsibility of its caller. */
7268 ULong
dis_MMXop_regmem_to_reg ( const VexAbiInfo
* vbi
,
7273 Bool show_granularity
)
7276 UChar modrm
= getUChar(delta
);
7277 Bool isReg
= epartIsReg(modrm
);
7278 IRExpr
* argL
= NULL
;
7279 IRExpr
* argR
= NULL
;
7280 IRExpr
* argG
= NULL
;
7281 IRExpr
* argE
= NULL
;
7282 IRTemp res
= newTemp(Ity_I64
);
7285 IROp op
= Iop_INVALID
;
7287 const HChar
* hName
= NULL
;
7290 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7293 /* Original MMX ones */
7294 case 0xFC: op
= Iop_Add8x8
; break;
7295 case 0xFD: op
= Iop_Add16x4
; break;
7296 case 0xFE: op
= Iop_Add32x2
; break;
7298 case 0xEC: op
= Iop_QAdd8Sx8
; break;
7299 case 0xED: op
= Iop_QAdd16Sx4
; break;
7301 case 0xDC: op
= Iop_QAdd8Ux8
; break;
7302 case 0xDD: op
= Iop_QAdd16Ux4
; break;
7304 case 0xF8: op
= Iop_Sub8x8
; break;
7305 case 0xF9: op
= Iop_Sub16x4
; break;
7306 case 0xFA: op
= Iop_Sub32x2
; break;
7308 case 0xE8: op
= Iop_QSub8Sx8
; break;
7309 case 0xE9: op
= Iop_QSub16Sx4
; break;
7311 case 0xD8: op
= Iop_QSub8Ux8
; break;
7312 case 0xD9: op
= Iop_QSub16Ux4
; break;
7314 case 0xE5: op
= Iop_MulHi16Sx4
; break;
7315 case 0xD5: op
= Iop_Mul16x4
; break;
7316 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd
); break;
7318 case 0x74: op
= Iop_CmpEQ8x8
; break;
7319 case 0x75: op
= Iop_CmpEQ16x4
; break;
7320 case 0x76: op
= Iop_CmpEQ32x2
; break;
7322 case 0x64: op
= Iop_CmpGT8Sx8
; break;
7323 case 0x65: op
= Iop_CmpGT16Sx4
; break;
7324 case 0x66: op
= Iop_CmpGT32Sx2
; break;
7326 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
7327 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
7328 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
7330 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
7331 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
7332 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
7334 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
7335 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
7336 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
7338 case 0xDB: op
= Iop_And64
; break;
7339 case 0xDF: op
= Iop_And64
; invG
= True
; break;
7340 case 0xEB: op
= Iop_Or64
; break;
7341 case 0xEF: /* Possibly do better here if argL and argR are the
7343 op
= Iop_Xor64
; break;
7345 /* Introduced in SSE1 */
7346 case 0xE0: op
= Iop_Avg8Ux8
; break;
7347 case 0xE3: op
= Iop_Avg16Ux4
; break;
7348 case 0xEE: op
= Iop_Max16Sx4
; break;
7349 case 0xDE: op
= Iop_Max8Ux8
; break;
7350 case 0xEA: op
= Iop_Min16Sx4
; break;
7351 case 0xDA: op
= Iop_Min8Ux8
; break;
7352 case 0xE4: op
= Iop_MulHi16Ux4
; break;
7353 case 0xF6: XXX(amd64g_calculate_mmx_psadbw
); break;
7355 /* Introduced in SSE2 */
7356 case 0xD4: op
= Iop_Add64
; break;
7357 case 0xFB: op
= Iop_Sub64
; break;
7360 vex_printf("\n0x%x\n", (UInt
)opc
);
7361 vpanic("dis_MMXop_regmem_to_reg");
7366 argG
= getMMXReg(gregLO3ofRM(modrm
));
7368 argG
= unop(Iop_Not64
, argG
);
7372 argE
= getMMXReg(eregLO3ofRM(modrm
));
7375 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7377 argE
= loadLE(Ity_I64
, mkexpr(addr
));
7388 if (op
!= Iop_INVALID
) {
7389 vassert(hName
== NULL
);
7390 vassert(hAddr
== NULL
);
7391 assign(res
, binop(op
, argL
, argR
));
7393 vassert(hName
!= NULL
);
7394 vassert(hAddr
!= NULL
);
7398 0/*regparms*/, hName
, hAddr
,
7399 mkIRExprVec_2( argL
, argR
)
7404 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
7406 DIP("%s%s %s, %s\n",
7407 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
7408 ( isReg
? nameMMXReg(eregLO3ofRM(modrm
)) : dis_buf
),
7409 nameMMXReg(gregLO3ofRM(modrm
)) );
7415 /* Vector by scalar shift of G by the amount specified at the bottom
7416 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7418 static ULong
dis_MMX_shiftG_byE ( const VexAbiInfo
* vbi
,
7419 Prefix pfx
, Long delta
,
7420 const HChar
* opname
, IROp op
)
7426 UChar rm
= getUChar(delta
);
7427 IRTemp g0
= newTemp(Ity_I64
);
7428 IRTemp g1
= newTemp(Ity_I64
);
7429 IRTemp amt
= newTemp(Ity_I64
);
7430 IRTemp amt8
= newTemp(Ity_I8
);
7432 if (epartIsReg(rm
)) {
7433 assign( amt
, getMMXReg(eregLO3ofRM(rm
)) );
7434 DIP("%s %s,%s\n", opname
,
7435 nameMMXReg(eregLO3ofRM(rm
)),
7436 nameMMXReg(gregLO3ofRM(rm
)) );
7439 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
7440 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
7441 DIP("%s %s,%s\n", opname
,
7443 nameMMXReg(gregLO3ofRM(rm
)) );
7446 assign( g0
, getMMXReg(gregLO3ofRM(rm
)) );
7447 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
7449 shl
= shr
= sar
= False
;
7452 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
7453 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7454 case Iop_Shl64
: shl
= True
; size
= 64; break;
7455 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7456 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7457 case Iop_Shr64
: shr
= True
; size
= 64; break;
7458 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7459 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7460 default: vassert(0);
7467 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7468 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7477 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7478 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7479 binop(op
, mkexpr(g0
), mkU8(size
-1))
7486 putMMXReg( gregLO3ofRM(rm
), mkexpr(g1
) );
7491 /* Vector by scalar shift of E by an immediate byte. This is a
7492 straight copy of dis_SSE_shiftE_imm. */
7495 ULong
dis_MMX_shiftE_imm ( Long delta
, const HChar
* opname
, IROp op
)
7498 UChar rm
= getUChar(delta
);
7499 IRTemp e0
= newTemp(Ity_I64
);
7500 IRTemp e1
= newTemp(Ity_I64
);
7502 vassert(epartIsReg(rm
));
7503 vassert(gregLO3ofRM(rm
) == 2
7504 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
7505 amt
= getUChar(delta
+1);
7507 DIP("%s $%d,%s\n", opname
,
7509 nameMMXReg(eregLO3ofRM(rm
)) );
7511 assign( e0
, getMMXReg(eregLO3ofRM(rm
)) );
7513 shl
= shr
= sar
= False
;
7516 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
7517 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7518 case Iop_Shl64
: shl
= True
; size
= 64; break;
7519 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7520 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7521 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7522 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7523 case Iop_Shr64
: shr
= True
; size
= 64; break;
7524 default: vassert(0);
7528 assign( e1
, amt
>= size
7530 : binop(op
, mkexpr(e0
), mkU8(amt
))
7534 assign( e1
, amt
>= size
7535 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7536 : binop(op
, mkexpr(e0
), mkU8(amt
))
7542 putMMXReg( eregLO3ofRM(rm
), mkexpr(e1
) );
7547 /* Completely handle all MMX instructions except emms. */
7550 ULong
dis_MMX ( Bool
* decode_ok
,
7551 const VexAbiInfo
* vbi
, Prefix pfx
, Int sz
, Long delta
)
7556 UChar opc
= getUChar(delta
);
7559 /* dis_MMX handles all insns except emms. */
7566 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7567 modrm
= getUChar(delta
);
7568 if (epartIsReg(modrm
)) {
7572 binop( Iop_32HLto64
,
7574 getIReg32(eregOfRexRM(pfx
,modrm
)) ) );
7575 DIP("movd %s, %s\n",
7576 nameIReg32(eregOfRexRM(pfx
,modrm
)),
7577 nameMMXReg(gregLO3ofRM(modrm
)));
7579 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7583 binop( Iop_32HLto64
,
7585 loadLE(Ity_I32
, mkexpr(addr
)) ) );
7586 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7591 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7592 modrm
= getUChar(delta
);
7593 if (epartIsReg(modrm
)) {
7595 putMMXReg( gregLO3ofRM(modrm
),
7596 getIReg64(eregOfRexRM(pfx
,modrm
)) );
7597 DIP("movd %s, %s\n",
7598 nameIReg64(eregOfRexRM(pfx
,modrm
)),
7599 nameMMXReg(gregLO3ofRM(modrm
)));
7601 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7603 putMMXReg( gregLO3ofRM(modrm
),
7604 loadLE(Ity_I64
, mkexpr(addr
)) );
7605 DIP("movd{64} %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7609 goto mmx_decode_failure
;
7615 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7616 modrm
= getUChar(delta
);
7617 if (epartIsReg(modrm
)) {
7619 putIReg32( eregOfRexRM(pfx
,modrm
),
7620 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7621 DIP("movd %s, %s\n",
7622 nameMMXReg(gregLO3ofRM(modrm
)),
7623 nameIReg32(eregOfRexRM(pfx
,modrm
)));
7625 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7627 storeLE( mkexpr(addr
),
7628 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7629 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7634 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7635 modrm
= getUChar(delta
);
7636 if (epartIsReg(modrm
)) {
7638 putIReg64( eregOfRexRM(pfx
,modrm
),
7639 getMMXReg(gregLO3ofRM(modrm
)) );
7640 DIP("movd %s, %s\n",
7641 nameMMXReg(gregLO3ofRM(modrm
)),
7642 nameIReg64(eregOfRexRM(pfx
,modrm
)));
7644 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7646 storeLE( mkexpr(addr
),
7647 getMMXReg(gregLO3ofRM(modrm
)) );
7648 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7651 goto mmx_decode_failure
;
7656 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7658 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7659 goto mmx_decode_failure
;
7660 modrm
= getUChar(delta
);
7661 if (epartIsReg(modrm
)) {
7663 putMMXReg( gregLO3ofRM(modrm
), getMMXReg(eregLO3ofRM(modrm
)) );
7664 DIP("movq %s, %s\n",
7665 nameMMXReg(eregLO3ofRM(modrm
)),
7666 nameMMXReg(gregLO3ofRM(modrm
)));
7668 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7670 putMMXReg( gregLO3ofRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
7671 DIP("movq %s, %s\n",
7672 dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7677 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7679 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7680 goto mmx_decode_failure
;
7681 modrm
= getUChar(delta
);
7682 if (epartIsReg(modrm
)) {
7684 putMMXReg( eregLO3ofRM(modrm
), getMMXReg(gregLO3ofRM(modrm
)) );
7685 DIP("movq %s, %s\n",
7686 nameMMXReg(gregLO3ofRM(modrm
)),
7687 nameMMXReg(eregLO3ofRM(modrm
)));
7689 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7691 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
7692 DIP("mov(nt)q %s, %s\n",
7693 nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7699 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7701 goto mmx_decode_failure
;
7702 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padd", True
);
7706 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7708 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7709 goto mmx_decode_failure
;
7710 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padds", True
);
7714 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7716 goto mmx_decode_failure
;
7717 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "paddus", True
);
7722 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7724 goto mmx_decode_failure
;
7725 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psub", True
);
7729 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7731 goto mmx_decode_failure
;
7732 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubs", True
);
7736 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7738 goto mmx_decode_failure
;
7739 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubus", True
);
7742 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7744 goto mmx_decode_failure
;
7745 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmulhw", False
);
7748 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7750 goto mmx_decode_failure
;
7751 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmullw", False
);
7754 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7756 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmaddwd", False
);
7761 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7763 goto mmx_decode_failure
;
7764 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpeq", True
);
7769 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7771 goto mmx_decode_failure
;
7772 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpgt", True
);
7775 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7777 goto mmx_decode_failure
;
7778 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packssdw", False
);
7781 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7783 goto mmx_decode_failure
;
7784 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packsswb", False
);
7787 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7789 goto mmx_decode_failure
;
7790 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packuswb", False
);
7795 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7797 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7798 goto mmx_decode_failure
;
7799 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckh", True
);
7804 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7806 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7807 goto mmx_decode_failure
;
7808 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckl", True
);
7811 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7813 goto mmx_decode_failure
;
7814 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pand", False
);
7817 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7819 goto mmx_decode_failure
;
7820 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pandn", False
);
7823 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7825 goto mmx_decode_failure
;
7826 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "por", False
);
7829 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7831 goto mmx_decode_failure
;
7832 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pxor", False
);
7835 # define SHIFT_BY_REG(_name,_op) \
7836 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7839 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7840 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
7841 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
7842 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
7844 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7845 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
7846 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
7847 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
7849 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7850 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
7851 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
7853 # undef SHIFT_BY_REG
7858 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7859 UChar byte2
, subopc
;
7861 goto mmx_decode_failure
;
7862 byte2
= getUChar(delta
); /* amode / sub-opcode */
7863 subopc
= toUChar( (byte2
>> 3) & 7 );
7865 # define SHIFT_BY_IMM(_name,_op) \
7866 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7869 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
7870 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
7871 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
7872 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
7873 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
7874 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
7876 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
7877 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
7878 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
7879 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
7881 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
7882 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
7883 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
7884 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
7885 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
7886 SHIFT_BY_IMM("psllq", Iop_Shl64
);
7888 else goto mmx_decode_failure
;
7890 # undef SHIFT_BY_IMM
7895 IRTemp addr
= newTemp(Ity_I64
);
7896 IRTemp regD
= newTemp(Ity_I64
);
7897 IRTemp regM
= newTemp(Ity_I64
);
7898 IRTemp mask
= newTemp(Ity_I64
);
7899 IRTemp olddata
= newTemp(Ity_I64
);
7900 IRTemp newdata
= newTemp(Ity_I64
);
7902 modrm
= getUChar(delta
);
7903 if (sz
!= 4 || (!epartIsReg(modrm
)))
7904 goto mmx_decode_failure
;
7907 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
7908 assign( regM
, getMMXReg( eregLO3ofRM(modrm
) ));
7909 assign( regD
, getMMXReg( gregLO3ofRM(modrm
) ));
7910 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
7911 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
7919 unop(Iop_Not64
, mkexpr(mask
)))) );
7920 storeLE( mkexpr(addr
), mkexpr(newdata
) );
7921 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm
) ),
7922 nameMMXReg( gregLO3ofRM(modrm
) ) );
7926 /* --- MMX decode failure --- */
7930 return delta
; /* ignored */
7939 /*------------------------------------------------------------*/
7940 /*--- More misc arithmetic and other obscure insns. ---*/
7941 /*------------------------------------------------------------*/
7943 /* Generate base << amt with vacated places filled with stuff
7944 from xtra. amt guaranteed in 0 .. 63. */
7946 IRExpr
* shiftL64_with_extras ( IRTemp base
, IRTemp xtra
, IRTemp amt
)
7950 else (base << amt) | (xtra >>u (64-amt))
7954 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7956 binop(Iop_Shl64
, mkexpr(base
), mkexpr(amt
)),
7957 binop(Iop_Shr64
, mkexpr(xtra
),
7958 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7964 /* Generate base >>u amt with vacated places filled with stuff
7965 from xtra. amt guaranteed in 0 .. 63. */
7967 IRExpr
* shiftR64_with_extras ( IRTemp xtra
, IRTemp base
, IRTemp amt
)
7971 else (base >>u amt) | (xtra << (64-amt))
7975 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7977 binop(Iop_Shr64
, mkexpr(base
), mkexpr(amt
)),
7978 binop(Iop_Shl64
, mkexpr(xtra
),
7979 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7985 /* Double length left and right shifts. Apparently only required in
7986 v-size (no b- variant). */
7988 ULong
dis_SHLRD_Gv_Ev ( const VexAbiInfo
* vbi
,
7990 Long delta
, UChar modrm
,
7993 Bool amt_is_literal
,
7994 const HChar
* shift_amt_txt
,
7997 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7998 for printing it. And eip on entry points at the modrm byte. */
8002 IRType ty
= szToITy(sz
);
8003 IRTemp gsrc
= newTemp(ty
);
8004 IRTemp esrc
= newTemp(ty
);
8005 IRTemp addr
= IRTemp_INVALID
;
8006 IRTemp tmpSH
= newTemp(Ity_I8
);
8007 IRTemp tmpSS
= newTemp(Ity_I8
);
8008 IRTemp tmp64
= IRTemp_INVALID
;
8009 IRTemp res64
= IRTemp_INVALID
;
8010 IRTemp rss64
= IRTemp_INVALID
;
8011 IRTemp resTy
= IRTemp_INVALID
;
8012 IRTemp rssTy
= IRTemp_INVALID
;
8013 Int mask
= sz
==8 ? 63 : 31;
8015 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8017 /* The E-part is the destination; this is shifted. The G-part
8018 supplies bits to be shifted into the E-part, but is not
8021 If shifting left, form a double-length word with E at the top
8022 and G at the bottom, and shift this left. The result is then in
8025 If shifting right, form a double-length word with G at the top
8026 and E at the bottom, and shift this right. The result is then
8029 /* Fetch the operands. */
8031 assign( gsrc
, getIRegG(sz
, pfx
, modrm
) );
8033 if (epartIsReg(modrm
)) {
8035 assign( esrc
, getIRegE(sz
, pfx
, modrm
) );
8036 DIP("sh%cd%c %s, %s, %s\n",
8037 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
8039 nameIRegG(sz
, pfx
, modrm
), nameIRegE(sz
, pfx
, modrm
));
8041 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
8042 /* # bytes following amode */
8043 amt_is_literal
? 1 : 0 );
8045 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
8046 DIP("sh%cd%c %s, %s, %s\n",
8047 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
8049 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
8052 /* Calculate the masked shift amount (tmpSH), the masked subshift
8053 amount (tmpSS), the shifted value (res64) and the subshifted
8056 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mkU8(mask
)) );
8057 assign( tmpSS
, binop(Iop_And8
,
8058 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
8061 tmp64
= newTemp(Ity_I64
);
8062 res64
= newTemp(Ity_I64
);
8063 rss64
= newTemp(Ity_I64
);
8065 if (sz
== 2 || sz
== 4) {
8067 /* G is xtra; E is data */
8068 /* what a freaking nightmare: */
8069 if (sz
== 4 && left_shift
) {
8070 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(esrc
), mkexpr(gsrc
)) );
8073 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
8077 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSS
)),
8081 if (sz
== 4 && !left_shift
) {
8082 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(gsrc
), mkexpr(esrc
)) );
8083 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8084 assign( rss64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSS
)) );
8087 if (sz
== 2 && left_shift
) {
8090 binop(Iop_16HLto32
, mkexpr(esrc
), mkexpr(gsrc
)),
8091 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
))
8093 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8096 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
8098 /* subshift formed by shifting [esrc'0000'0000'0000] */
8102 binop(Iop_Shl64
, unop(Iop_16Uto64
, mkexpr(esrc
)),
8108 if (sz
== 2 && !left_shift
) {
8111 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
)),
8112 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(esrc
))
8114 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8115 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8116 /* subshift formed by shifting [0000'0000'0000'esrc] */
8117 assign( rss64
, binop(Iop_Shr64
,
8118 unop(Iop_16Uto64
, mkexpr(esrc
)),
8126 assign( res64
, shiftL64_with_extras( esrc
, gsrc
, tmpSH
));
8127 assign( rss64
, shiftL64_with_extras( esrc
, gsrc
, tmpSS
));
8129 assign( res64
, shiftR64_with_extras( gsrc
, esrc
, tmpSH
));
8130 assign( rss64
, shiftR64_with_extras( gsrc
, esrc
, tmpSS
));
8135 resTy
= newTemp(ty
);
8136 rssTy
= newTemp(ty
);
8137 assign( resTy
, narrowTo(ty
, mkexpr(res64
)) );
8138 assign( rssTy
, narrowTo(ty
, mkexpr(rss64
)) );
8140 /* Put result back and write the flags thunk. */
8141 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl64
: Iop_Sar64
,
8142 resTy
, rssTy
, ty
, tmpSH
);
8144 if (epartIsReg(modrm
)) {
8145 putIRegE(sz
, pfx
, modrm
, mkexpr(resTy
));
8147 storeLE( mkexpr(addr
), mkexpr(resTy
) );
8150 if (amt_is_literal
) delta
++;
8155 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8158 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
8160 static const HChar
* nameBtOp ( BtOp op
)
8163 case BtOpNone
: return "";
8164 case BtOpSet
: return "s";
8165 case BtOpReset
: return "r";
8166 case BtOpComp
: return "c";
8167 default: vpanic("nameBtOp(amd64)");
8173 ULong
dis_bt_G_E ( const VexAbiInfo
* vbi
,
8174 Prefix pfx
, Int sz
, Long delta
, BtOp op
,
8175 /*OUT*/Bool
* decode_OK
)
8180 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
8181 t_addr1
, t_rsp
, t_mask
, t_new
;
8183 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8185 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
8186 = t_addr0
= t_addr1
= t_rsp
8187 = t_mask
= t_new
= IRTemp_INVALID
;
8189 t_fetched
= newTemp(Ity_I8
);
8190 t_new
= newTemp(Ity_I8
);
8191 t_bitno0
= newTemp(Ity_I64
);
8192 t_bitno1
= newTemp(Ity_I64
);
8193 t_bitno2
= newTemp(Ity_I8
);
8194 t_addr1
= newTemp(Ity_I64
);
8195 modrm
= getUChar(delta
);
8198 if (epartIsReg(modrm
)) {
8199 /* F2 and F3 are never acceptable. */
8200 if (haveF2orF3(pfx
)) {
8205 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8206 present, and only for the BTC/BTS/BTR cases (not BT). */
8207 if (haveF2orF3(pfx
)) {
8208 if (haveF2andF3(pfx
) || !haveLOCK(pfx
) || op
== BtOpNone
) {
8215 assign( t_bitno0
, widenSto64(getIRegG(sz
, pfx
, modrm
)) );
8217 if (epartIsReg(modrm
)) {
8219 /* Get it onto the client's stack. Oh, this is a horrible
8220 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8221 Because of the ELF ABI stack redzone, there may be live data
8222 up to 128 bytes below %RSP. So we can't just push it on the
8223 stack, else we may wind up trashing live data, and causing
8224 impossible-to-find simulation errors. (Yes, this did
8225 happen.) So we need to drop RSP before at least 128 before
8226 pushing it. That unfortunately means hitting Memcheck's
8227 fast-case painting code. Ideally we should drop more than
8228 128, to reduce the chances of breaking buggy programs that
8229 have live data below -128(%RSP). Memcheck fast-cases moves
8230 of 288 bytes due to the need to handle ppc64-linux quickly,
8231 so let's use 288. Of course the real fix is to get rid of
8232 this kludge entirely. */
8233 t_rsp
= newTemp(Ity_I64
);
8234 t_addr0
= newTemp(Ity_I64
);
8236 vassert(vbi
->guest_stack_redzone_size
== 128);
8237 assign( t_rsp
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(288)) );
8238 putIReg64(R_RSP
, mkexpr(t_rsp
));
8240 storeLE( mkexpr(t_rsp
), getIRegE(sz
, pfx
, modrm
) );
8242 /* Make t_addr0 point at it. */
8243 assign( t_addr0
, mkexpr(t_rsp
) );
8245 /* Mask out upper bits of the shift amount, since we're doing a
8247 assign( t_bitno1
, binop(Iop_And64
,
8249 mkU64(sz
== 8 ? 63 : sz
== 4 ? 31 : 15)) );
8252 t_addr0
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8254 assign( t_bitno1
, mkexpr(t_bitno0
) );
8257 /* At this point: t_addr0 is the address being operated on. If it
8258 was a reg, we will have pushed it onto the client's stack.
8259 t_bitno1 is the bit number, suitably masked in the case of a
8262 /* Now the main sequence. */
8266 binop(Iop_Sar64
, mkexpr(t_bitno1
), mkU8(3))) );
8268 /* t_addr1 now holds effective address */
8272 binop(Iop_And64
, mkexpr(t_bitno1
), mkU64(7))) );
8274 /* t_bitno2 contains offset of bit within byte */
8276 if (op
!= BtOpNone
) {
8277 t_mask
= newTemp(Ity_I8
);
8278 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
8281 /* t_mask is now a suitable byte mask */
8283 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
8285 if (op
!= BtOpNone
) {
8289 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8293 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8297 binop(Iop_And8
, mkexpr(t_fetched
),
8298 unop(Iop_Not8
, mkexpr(t_mask
))) );
8301 vpanic("dis_bt_G_E(amd64)");
8303 if ((haveLOCK(pfx
)) && !epartIsReg(modrm
)) {
8304 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
8305 mkexpr(t_new
)/*new*/,
8306 guest_RIP_curr_instr
);
8308 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
8312 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8313 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8314 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8315 are also unchanged, so let's do that. */
8316 const ULong maskC
= AMD64G_CC_MASK_C
;
8317 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
8318 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
8321 IRTemp old_rflags
= newTemp(Ity_I64
);
8322 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
8324 IRTemp new_rflags
= newTemp(Ity_I64
);
8327 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
8330 unop(Iop_8Uto64
, mkexpr(t_fetched
)),
8334 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8335 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8336 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
8337 /* Set NDEP even though it isn't used. This makes redundant-PUT
8338 elimination of previous stores to this field work better. */
8339 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8341 /* Move reg operand from stack back to reg */
8342 if (epartIsReg(modrm
)) {
8343 /* t_rsp still points at it. */
8344 /* only write the reg if actually modifying it; doing otherwise
8345 zeroes the top half erroneously when doing btl due to
8346 standard zero-extend rule */
8348 putIRegE(sz
, pfx
, modrm
, loadLE(szToITy(sz
), mkexpr(t_rsp
)) );
8349 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t_rsp
), mkU64(288)) );
8352 DIP("bt%s%c %s, %s\n",
8353 nameBtOp(op
), nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
8354 ( epartIsReg(modrm
) ? nameIRegE(sz
, pfx
, modrm
) : dis_buf
) );
8361 /* Handle BSF/BSR. Only v-size seems necessary. */
8363 ULong
dis_bs_E_G ( const VexAbiInfo
* vbi
,
8364 Prefix pfx
, Int sz
, Long delta
, Bool fwds
)
8370 IRType ty
= szToITy(sz
);
8371 IRTemp src
= newTemp(ty
);
8372 IRTemp dst
= newTemp(ty
);
8373 IRTemp src64
= newTemp(Ity_I64
);
8374 IRTemp dst64
= newTemp(Ity_I64
);
8375 IRTemp srcB
= newTemp(Ity_I1
);
8377 vassert(sz
== 8 || sz
== 4 || sz
== 2);
8379 modrm
= getUChar(delta
);
8380 isReg
= epartIsReg(modrm
);
8383 assign( src
, getIRegE(sz
, pfx
, modrm
) );
8386 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8388 assign( src
, loadLE(ty
, mkexpr(addr
)) );
8391 DIP("bs%c%c %s, %s\n",
8392 fwds
? 'f' : 'r', nameISize(sz
),
8393 ( isReg
? nameIRegE(sz
, pfx
, modrm
) : dis_buf
),
8394 nameIRegG(sz
, pfx
, modrm
));
8396 /* First, widen src to 64 bits if it is not already. */
8397 assign( src64
, widenUto64(mkexpr(src
)) );
8399 /* Generate a bool expression which is zero iff the original is
8400 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8401 instrumented by Memcheck, is instrumented expensively, since
8402 this may be used on the output of a preceding movmskb insn,
8403 which has been known to be partially defined, and in need of
8404 careful handling. */
8405 assign( srcB
, binop(Iop_ExpCmpNE64
, mkexpr(src64
), mkU64(0)) );
8407 /* Flags: Z is 1 iff source value is zero. All others
8408 are undefined -- we force them to zero. */
8409 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8410 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8413 IRExpr_ITE( mkexpr(srcB
),
8417 mkU64(AMD64G_CC_MASK_Z
)
8420 /* Set NDEP even though it isn't used. This makes redundant-PUT
8421 elimination of previous stores to this field work better. */
8422 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8424 /* Result: iff source value is zero, we can't use
8425 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8426 But anyway, amd64 semantics say the result is undefined in
8427 such situations. Hence handle the zero case specially. */
8429 /* Bleh. What we compute:
8431 bsf64: if src == 0 then {dst is unchanged}
8434 bsr64: if src == 0 then {dst is unchanged}
8435 else 63 - Clz64(src)
8437 bsf32: if src == 0 then {dst is unchanged}
8438 else Ctz64(32Uto64(src))
8440 bsr32: if src == 0 then {dst is unchanged}
8441 else 63 - Clz64(32Uto64(src))
8443 bsf16: if src == 0 then {dst is unchanged}
8444 else Ctz64(32Uto64(16Uto32(src)))
8446 bsr16: if src == 0 then {dst is unchanged}
8447 else 63 - Clz64(32Uto64(16Uto32(src)))
8450 /* The main computation, guarding against zero. */
8455 fwds
? unop(Iop_Ctz64
, mkexpr(src64
))
8458 unop(Iop_Clz64
, mkexpr(src64
))),
8459 /* src == 0 -- leave dst unchanged */
8460 widenUto64( getIRegG( sz
, pfx
, modrm
) )
8465 assign( dst
, unop(Iop_64to16
, mkexpr(dst64
)) );
8468 assign( dst
, unop(Iop_64to32
, mkexpr(dst64
)) );
8470 assign( dst
, mkexpr(dst64
) );
8472 /* dump result back */
8473 putIRegG( sz
, pfx
, modrm
, mkexpr(dst
) );
8479 /* swap rAX with the reg specified by reg and REX.B */
8481 void codegen_xchg_rAX_Reg ( Prefix pfx
, Int sz
, UInt regLo3
)
8483 IRType ty
= szToITy(sz
);
8484 IRTemp t1
= newTemp(ty
);
8485 IRTemp t2
= newTemp(ty
);
8486 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8487 vassert(regLo3
< 8);
8489 assign( t1
, getIReg64(R_RAX
) );
8490 assign( t2
, getIRegRexB(8, pfx
, regLo3
) );
8491 putIReg64( R_RAX
, mkexpr(t2
) );
8492 putIRegRexB(8, pfx
, regLo3
, mkexpr(t1
) );
8493 } else if (sz
== 4) {
8494 assign( t1
, getIReg32(R_RAX
) );
8495 assign( t2
, getIRegRexB(4, pfx
, regLo3
) );
8496 putIReg32( R_RAX
, mkexpr(t2
) );
8497 putIRegRexB(4, pfx
, regLo3
, mkexpr(t1
) );
8499 assign( t1
, getIReg16(R_RAX
) );
8500 assign( t2
, getIRegRexB(2, pfx
, regLo3
) );
8501 putIReg16( R_RAX
, mkexpr(t2
) );
8502 putIRegRexB(2, pfx
, regLo3
, mkexpr(t1
) );
8504 DIP("xchg%c %s, %s\n",
8505 nameISize(sz
), nameIRegRAX(sz
),
8506 nameIRegRexB(sz
,pfx
, regLo3
));
8511 void codegen_SAHF ( void )
8513 /* Set the flags to:
8514 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8515 -- retain the old O flag
8516 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8517 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8519 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8520 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8521 IRTemp oldflags
= newTemp(Ity_I64
);
8522 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
8523 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8524 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8525 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8526 stmt( IRStmt_Put( OFFB_CC_DEP1
,
8528 binop(Iop_And64
, mkexpr(oldflags
), mkU64(AMD64G_CC_MASK_O
)),
8530 binop(Iop_Shr64
, getIReg64(R_RAX
), mkU8(8)),
8538 void codegen_LAHF ( void )
8540 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8541 IRExpr
* rax_with_hole
;
8544 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8545 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8547 IRTemp flags
= newTemp(Ity_I64
);
8548 assign( flags
, mk_amd64g_calculate_rflags_all() );
8551 = binop(Iop_And64
, getIReg64(R_RAX
), mkU64(~0xFF00ULL
));
8553 = binop(Iop_Or64
, binop(Iop_And64
, mkexpr(flags
), mkU64(mask_SZACP
)),
8556 = binop(Iop_Or64
, rax_with_hole
,
8557 binop(Iop_Shl64
, new_byte
, mkU8(8)));
8558 putIReg64(R_RAX
, new_rax
);
8563 ULong
dis_cmpxchg_G_E ( /*OUT*/Bool
* ok
,
8564 const VexAbiInfo
* vbi
,
8572 IRType ty
= szToITy(size
);
8573 IRTemp acc
= newTemp(ty
);
8574 IRTemp src
= newTemp(ty
);
8575 IRTemp dest
= newTemp(ty
);
8576 IRTemp dest2
= newTemp(ty
);
8577 IRTemp acc2
= newTemp(ty
);
8578 IRTemp cond
= newTemp(Ity_I1
);
8579 IRTemp addr
= IRTemp_INVALID
;
8580 UChar rm
= getUChar(delta0
);
8582 /* There are 3 cases to consider:
8584 reg-reg: ignore any lock prefix, generate sequence based
8587 reg-mem, not locked: ignore any lock prefix, generate sequence
8590 reg-mem, locked: use IRCAS
8593 /* Decide whether F2 or F3 are acceptable. Never for register
8594 case, but for the memory case, one or the other is OK provided
8595 LOCK is also present. */
8596 if (epartIsReg(rm
)) {
8597 if (haveF2orF3(pfx
)) {
8602 if (haveF2orF3(pfx
)) {
8603 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
8610 if (epartIsReg(rm
)) {
8612 assign( dest
, getIRegE(size
, pfx
, rm
) );
8614 assign( src
, getIRegG(size
, pfx
, rm
) );
8615 assign( acc
, getIRegRAX(size
) );
8616 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8617 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8618 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8619 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8620 putIRegRAX(size
, mkexpr(acc2
));
8621 putIRegE(size
, pfx
, rm
, mkexpr(dest2
));
8622 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8623 nameIRegG(size
,pfx
,rm
),
8624 nameIRegE(size
,pfx
,rm
) );
8626 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8628 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8629 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
8631 assign( src
, getIRegG(size
, pfx
, rm
) );
8632 assign( acc
, getIRegRAX(size
) );
8633 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8634 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8635 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8636 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8637 putIRegRAX(size
, mkexpr(acc2
));
8638 storeLE( mkexpr(addr
), mkexpr(dest2
) );
8639 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8640 nameIRegG(size
,pfx
,rm
), dis_buf
);
8642 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8644 /* src is new value. acc is expected value. dest is old value.
8645 Compute success from the output of the IRCAS, and steer the
8646 new value for RAX accordingly: in case of success, RAX is
8648 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8650 assign( src
, getIRegG(size
, pfx
, rm
) );
8651 assign( acc
, getIRegRAX(size
) );
8653 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
8654 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
8656 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8657 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8658 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8659 putIRegRAX(size
, mkexpr(acc2
));
8660 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8661 nameIRegG(size
,pfx
,rm
), dis_buf
);
8670 /* Handle conditional move instructions of the form
8671 cmovcc E(reg-or-mem), G(reg)
8673 E(src) is reg-or-mem
8676 If E is reg, --> GET %E, tmps
8681 If E is mem --> (getAddr E) -> tmpa
8688 ULong
dis_cmov_E_G ( const VexAbiInfo
* vbi
,
8694 UChar rm
= getUChar(delta0
);
8698 IRType ty
= szToITy(sz
);
8699 IRTemp tmps
= newTemp(ty
);
8700 IRTemp tmpd
= newTemp(ty
);
8702 if (epartIsReg(rm
)) {
8703 assign( tmps
, getIRegE(sz
, pfx
, rm
) );
8704 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8706 putIRegG( sz
, pfx
, rm
,
8707 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8711 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8712 nameIRegE(sz
,pfx
,rm
),
8713 nameIRegG(sz
,pfx
,rm
));
8717 /* E refers to memory */
8719 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8720 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
8721 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8723 putIRegG( sz
, pfx
, rm
,
8724 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8729 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8731 nameIRegG(sz
,pfx
,rm
));
8738 ULong
dis_xadd_G_E ( /*OUT*/Bool
* decode_ok
,
8739 const VexAbiInfo
* vbi
,
8740 Prefix pfx
, Int sz
, Long delta0
)
8743 UChar rm
= getUChar(delta0
);
8746 IRType ty
= szToITy(sz
);
8747 IRTemp tmpd
= newTemp(ty
);
8748 IRTemp tmpt0
= newTemp(ty
);
8749 IRTemp tmpt1
= newTemp(ty
);
8751 /* There are 3 cases to consider:
8753 reg-reg: ignore any lock prefix,
8754 generate 'naive' (non-atomic) sequence
8756 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8757 (non-atomic) sequence
8759 reg-mem, locked: use IRCAS
8762 if (epartIsReg(rm
)) {
8764 assign( tmpd
, getIRegE(sz
, pfx
, rm
) );
8765 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8766 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8767 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8768 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8769 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8770 putIRegE(sz
, pfx
, rm
, mkexpr(tmpt1
));
8771 DIP("xadd%c %s, %s\n",
8772 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), nameIRegE(sz
,pfx
,rm
));
8776 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8778 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8779 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8780 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8781 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8782 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8783 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8784 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
8785 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8786 DIP("xadd%c %s, %s\n",
8787 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8791 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8793 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8794 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8795 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8796 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8797 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8798 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
8799 mkexpr(tmpt1
)/*newVal*/, guest_RIP_curr_instr
);
8800 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8801 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8802 DIP("xadd%c %s, %s\n",
8803 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8811 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8814 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8818 //.. UChar rm = getUChar(delta0);
8819 //.. HChar dis_buf[50];
8821 //.. if (epartIsReg(rm)) {
8822 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8823 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8824 //.. return 1+delta0;
8826 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8827 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8828 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8829 //.. return len+delta0;
8833 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8834 //.. dst is ireg and sz==4, zero out top half of it. */
8837 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8843 //.. UChar rm = getUChar(delta0);
8844 //.. HChar dis_buf[50];
8846 //.. vassert(sz == 2 || sz == 4);
8848 //.. if (epartIsReg(rm)) {
8850 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8852 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8854 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8855 //.. return 1+delta0;
8857 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8858 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8859 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8860 //.. return len+delta0;
8864 /* Handle move instructions of the form
8866 mov sreg, reg-or-mem
8867 Is passed the a ptr to the modRM byte, and the data size. Returns
8868 the address advanced completely over this instruction.
8870 VEX does not currently simulate segment registers on AMD64 which means that
8871 instead of moving a value of a segment register, zero is moved to the
8872 destination. The zero value represents a null (unused) selector. This is
8873 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8874 provide a sufficient simulation for currently seen programs that use this
8875 instruction. If some program actually decides to use the obtained segment
8876 selector for something meaningful then the zero value should be a clear
8877 indicator that there is some problem.
8880 E(dst) is reg-or-mem
8882 If E is reg, --> PUT $0, %E
8884 If E is mem, --> (getAddr E) -> tmpa
8888 ULong
dis_mov_S_E ( const VexAbiInfo
* vbi
,
8894 UChar rm
= getUChar(delta0
);
8897 if (epartIsReg(rm
)) {
8898 putIRegE(size
, pfx
, rm
, mkU(szToITy(size
), 0));
8899 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8900 nameIRegE(size
, pfx
, rm
));
8904 /* E refers to memory */
8906 IRTemp addr
= disAMode(&len
, vbi
, pfx
, delta0
, dis_buf
, 0);
8907 storeLE(mkexpr(addr
), mkU16(0));
8908 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8915 //.. void dis_push_segreg ( UInt sreg, Int sz )
8917 //.. IRTemp t1 = newTemp(Ity_I16);
8918 //.. IRTemp ta = newTemp(Ity_I32);
8919 //.. vassert(sz == 2 || sz == 4);
8921 //.. assign( t1, getSReg(sreg) );
8922 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8923 //.. putIReg(4, R_ESP, mkexpr(ta));
8924 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8926 //.. DIP("pushw %s\n", nameSReg(sreg));
8930 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8932 //.. IRTemp t1 = newTemp(Ity_I16);
8933 //.. IRTemp ta = newTemp(Ity_I32);
8934 //.. vassert(sz == 2 || sz == 4);
8936 //.. assign( ta, getIReg(4, R_ESP) );
8937 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8939 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8940 //.. putSReg( sreg, mkexpr(t1) );
8941 //.. DIP("pop %s\n", nameSReg(sreg));
8945 void dis_ret ( /*MOD*/DisResult
* dres
, const VexAbiInfo
* vbi
, ULong d64
)
8947 IRTemp t1
= newTemp(Ity_I64
);
8948 IRTemp t2
= newTemp(Ity_I64
);
8949 IRTemp t3
= newTemp(Ity_I64
);
8950 assign(t1
, getIReg64(R_RSP
));
8951 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
8952 assign(t3
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8+d64
)));
8953 putIReg64(R_RSP
, mkexpr(t3
));
8954 make_redzone_AbiHint(vbi
, t3
, t2
/*nia*/, "ret");
8955 jmp_treg(dres
, Ijk_Ret
, t2
);
8956 vassert(dres
->whatNext
== Dis_StopHere
);
8960 /*------------------------------------------------------------*/
8961 /*--- SSE/SSE2/SSE3 helpers ---*/
8962 /*------------------------------------------------------------*/
8964 /* Indicates whether the op requires a rounding-mode argument. Note
8965 that this covers only vector floating point arithmetic ops, and
8966 omits the scalar ones that need rounding modes. Note also that
8967 inconsistencies here will get picked up later by the IR sanity
8968 checker, so this isn't correctness-critical. */
8969 static Bool
requiresRMode ( IROp op
)
8973 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
8974 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
8975 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
8976 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
8978 case Iop_Add32Fx8
: case Iop_Sub32Fx8
:
8979 case Iop_Mul32Fx8
: case Iop_Div32Fx8
:
8980 case Iop_Add64Fx4
: case Iop_Sub64Fx4
:
8981 case Iop_Mul64Fx4
: case Iop_Div64Fx4
:
8990 /* Worker function; do not call directly.
8991 Handles full width G = G `op` E and G = (not G) `op` E.
8994 static ULong
dis_SSE_E_to_G_all_wrk (
8995 const VexAbiInfo
* vbi
,
8996 Prefix pfx
, Long delta
,
8997 const HChar
* opname
, IROp op
,
9004 UChar rm
= getUChar(delta
);
9005 Bool needsRMode
= requiresRMode(op
);
9007 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRexRM(pfx
,rm
)))
9008 : getXMMReg(gregOfRexRM(pfx
,rm
));
9009 if (epartIsReg(rm
)) {
9011 gregOfRexRM(pfx
,rm
),
9013 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9015 getXMMReg(eregOfRexRM(pfx
,rm
)))
9017 getXMMReg(eregOfRexRM(pfx
,rm
)))
9019 DIP("%s %s,%s\n", opname
,
9020 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9021 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9024 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9026 gregOfRexRM(pfx
,rm
),
9028 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9030 loadLE(Ity_V128
, mkexpr(addr
)))
9032 loadLE(Ity_V128
, mkexpr(addr
)))
9034 DIP("%s %s,%s\n", opname
,
9036 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9042 /* All lanes SSE binary operation, G = G `op` E. */
9045 ULong
dis_SSE_E_to_G_all ( const VexAbiInfo
* vbi
,
9046 Prefix pfx
, Long delta
,
9047 const HChar
* opname
, IROp op
)
9049 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, False
);
9052 /* All lanes SSE binary operation, G = (not G) `op` E. */
9055 ULong
dis_SSE_E_to_G_all_invG ( const VexAbiInfo
* vbi
,
9056 Prefix pfx
, Long delta
,
9057 const HChar
* opname
, IROp op
)
9059 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, True
);
9063 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
9065 static ULong
dis_SSE_E_to_G_lo32 ( const VexAbiInfo
* vbi
,
9066 Prefix pfx
, Long delta
,
9067 const HChar
* opname
, IROp op
)
9072 UChar rm
= getUChar(delta
);
9073 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9074 if (epartIsReg(rm
)) {
9075 putXMMReg( gregOfRexRM(pfx
,rm
),
9077 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9078 DIP("%s %s,%s\n", opname
,
9079 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9080 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9083 /* We can only do a 32-bit memory read, so the upper 3/4 of the
9084 E operand needs to be made simply of zeroes. */
9085 IRTemp epart
= newTemp(Ity_V128
);
9086 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9087 assign( epart
, unop( Iop_32UtoV128
,
9088 loadLE(Ity_I32
, mkexpr(addr
))) );
9089 putXMMReg( gregOfRexRM(pfx
,rm
),
9090 binop(op
, gpart
, mkexpr(epart
)) );
9091 DIP("%s %s,%s\n", opname
,
9093 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9099 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9101 static ULong
dis_SSE_E_to_G_lo64 ( const VexAbiInfo
* vbi
,
9102 Prefix pfx
, Long delta
,
9103 const HChar
* opname
, IROp op
)
9108 UChar rm
= getUChar(delta
);
9109 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9110 if (epartIsReg(rm
)) {
9111 putXMMReg( gregOfRexRM(pfx
,rm
),
9113 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9114 DIP("%s %s,%s\n", opname
,
9115 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9116 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9119 /* We can only do a 64-bit memory read, so the upper half of the
9120 E operand needs to be made simply of zeroes. */
9121 IRTemp epart
= newTemp(Ity_V128
);
9122 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9123 assign( epart
, unop( Iop_64UtoV128
,
9124 loadLE(Ity_I64
, mkexpr(addr
))) );
9125 putXMMReg( gregOfRexRM(pfx
,rm
),
9126 binop(op
, gpart
, mkexpr(epart
)) );
9127 DIP("%s %s,%s\n", opname
,
9129 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9135 /* All lanes unary SSE operation, G = op(E). */
9137 static ULong
dis_SSE_E_to_G_unary_all (
9138 const VexAbiInfo
* vbi
,
9139 Prefix pfx
, Long delta
,
9140 const HChar
* opname
, IROp op
9146 UChar rm
= getUChar(delta
);
9147 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9148 // up in the usual way.
9149 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
9150 if (epartIsReg(rm
)) {
9151 IRExpr
* src
= getXMMReg(eregOfRexRM(pfx
,rm
));
9152 /* XXXROUNDINGFIXME */
9153 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9155 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9156 DIP("%s %s,%s\n", opname
,
9157 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9158 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9161 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9162 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
9163 /* XXXROUNDINGFIXME */
9164 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9166 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9167 DIP("%s %s,%s\n", opname
,
9169 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9175 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9177 static ULong
dis_SSE_E_to_G_unary_lo32 (
9178 const VexAbiInfo
* vbi
,
9179 Prefix pfx
, Long delta
,
9180 const HChar
* opname
, IROp op
9183 /* First we need to get the old G value and patch the low 32 bits
9184 of the E operand into it. Then apply op and write back to G. */
9188 UChar rm
= getUChar(delta
);
9189 IRTemp oldG0
= newTemp(Ity_V128
);
9190 IRTemp oldG1
= newTemp(Ity_V128
);
9192 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9194 if (epartIsReg(rm
)) {
9196 binop( Iop_SetV128lo32
,
9198 getXMMRegLane32(eregOfRexRM(pfx
,rm
), 0)) );
9199 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9200 DIP("%s %s,%s\n", opname
,
9201 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9202 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9205 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9207 binop( Iop_SetV128lo32
,
9209 loadLE(Ity_I32
, mkexpr(addr
)) ));
9210 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9211 DIP("%s %s,%s\n", opname
,
9213 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9219 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9221 static ULong
dis_SSE_E_to_G_unary_lo64 (
9222 const VexAbiInfo
* vbi
,
9223 Prefix pfx
, Long delta
,
9224 const HChar
* opname
, IROp op
9227 /* First we need to get the old G value and patch the low 64 bits
9228 of the E operand into it. Then apply op and write back to G. */
9232 UChar rm
= getUChar(delta
);
9233 IRTemp oldG0
= newTemp(Ity_V128
);
9234 IRTemp oldG1
= newTemp(Ity_V128
);
9236 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9238 if (epartIsReg(rm
)) {
9240 binop( Iop_SetV128lo64
,
9242 getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0)) );
9243 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9244 DIP("%s %s,%s\n", opname
,
9245 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9246 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9249 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9251 binop( Iop_SetV128lo64
,
9253 loadLE(Ity_I64
, mkexpr(addr
)) ));
9254 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9255 DIP("%s %s,%s\n", opname
,
9257 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9263 /* SSE integer binary operation:
9264 G = G `op` E (eLeft == False)
9265 G = E `op` G (eLeft == True)
9267 static ULong
dis_SSEint_E_to_G(
9268 const VexAbiInfo
* vbi
,
9269 Prefix pfx
, Long delta
,
9270 const HChar
* opname
, IROp op
,
9277 UChar rm
= getUChar(delta
);
9278 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9279 IRExpr
* epart
= NULL
;
9280 if (epartIsReg(rm
)) {
9281 epart
= getXMMReg(eregOfRexRM(pfx
,rm
));
9282 DIP("%s %s,%s\n", opname
,
9283 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9284 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9287 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9288 epart
= loadLE(Ity_V128
, mkexpr(addr
));
9289 DIP("%s %s,%s\n", opname
,
9291 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9294 putXMMReg( gregOfRexRM(pfx
,rm
),
9295 eLeft
? binop(op
, epart
, gpart
)
9296 : binop(op
, gpart
, epart
) );
9301 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9302 This is all a bit of a kludge in that it ignores the subtleties of
9303 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9304 spec. The meaning of the outputs is as follows:
9306 preZeroP: the active lanes of both incoming arguments should be set to zero
9307 before performing the operation. IOW the actual args are to be ignored
9308 and instead zero bits are to be used. This is a bit strange but is needed
9309 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9312 preSwapP: the args should be swapped before performing the operation. Note
9313 that zeroing arg input sections (per preZeroP) and swapping them (per
9314 preSwapP) are allowed to happen in either order; the result is the same.
9316 opP: this returns the actual comparison op to perform.
9318 postNotP: if true, the result(ing vector) of the comparison operation should
9319 be bitwise-not-ed. Note that only the lanes of the output actually
9320 computed by opP should be not-ed.
9322 static Bool
findSSECmpOp ( /*OUT*/Bool
* preZeroP
,
9323 /*OUT*/Bool
* preSwapP
,
9325 /*OUT*/Bool
* postNotP
,
9326 UInt imm8
, Bool all_lanes
, Int sz
)
9328 vassert(*preZeroP
== False
);
9329 vassert(*preSwapP
== False
);
9330 vassert(*opP
== Iop_INVALID
);
9331 vassert(*postNotP
== False
);
9333 if (imm8
>= 32) return False
;
9335 /* First, compute a (preZero, preSwap, op, postNot) quad from
9336 the supplied imm8. */
9337 Bool preZero
= False
;
9338 Bool preSwap
= False
;
9339 IROp op
= Iop_INVALID
;
9340 Bool postNot
= False
;
9342 # define XXX(_preZero, _preSwap, _op, _postNot) \
9343 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9344 // If you add a case here, add a corresponding test for both VCMPSD_128
9345 // and VCMPSS_128 in avx-1.c.
9346 // Cases 0xA and above are
9347 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9349 // "O" = ordered, "U" = unordered
9350 // "Q" = non-signalling (quiet), "S" = signalling
9352 // replace active arg lanes in operands with zero
9354 // | swap operands before applying the cmp op?
9356 // | | cmp op invert active lanes after?
9359 case 0x0: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OQ
9360 case 0x8: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_UQ
9361 case 0x10: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OS
9362 case 0x18: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_US
9364 case 0x1: XXX(False
, False
, Iop_CmpLT32Fx4
, False
); break; // LT_OS
9365 case 0x11: XXX(False
, False
, Iop_CmpLT32Fx4
, False
); break; // LT_OQ
9367 case 0x2: XXX(False
, False
, Iop_CmpLE32Fx4
, False
); break; // LE_OS
9368 case 0x12: XXX(False
, False
, Iop_CmpLE32Fx4
, False
); break; // LE_OQ
9370 case 0x3: XXX(False
, False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_Q
9371 case 0x13: XXX(False
, False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_S
9373 // 0xC: this isn't really right because it returns all-1s when
9374 // either operand is a NaN, and it should return all-0s.
9375 case 0x4: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_UQ
9376 case 0xC: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OQ
9377 case 0x14: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_US
9378 case 0x1C: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OS
9380 case 0x5: XXX(False
, False
, Iop_CmpLT32Fx4
, True
); break; // NLT_US
9381 case 0x15: XXX(False
, False
, Iop_CmpLT32Fx4
, True
); break; // NLT_UQ
9383 case 0x6: XXX(False
, False
, Iop_CmpLE32Fx4
, True
); break; // NLE_US
9384 case 0x16: XXX(False
, False
, Iop_CmpLE32Fx4
, True
); break; // NLE_UQ
9386 case 0x7: XXX(False
, False
, Iop_CmpUN32Fx4
, True
); break; // ORD_Q
9387 case 0x17: XXX(False
, False
, Iop_CmpUN32Fx4
, True
); break; // ORD_S
9389 case 0x9: XXX(False
, True
, Iop_CmpLE32Fx4
, True
); break; // NGE_US
9390 case 0x19: XXX(False
, True
, Iop_CmpLE32Fx4
, True
); break; // NGE_UQ
9392 case 0xA: XXX(False
, True
, Iop_CmpLT32Fx4
, True
); break; // NGT_US
9393 case 0x1A: XXX(False
, True
, Iop_CmpLT32Fx4
, True
); break; // NGT_UQ
9395 case 0xD: XXX(False
, True
, Iop_CmpLE32Fx4
, False
); break; // GE_OS
9396 case 0x1D: XXX(False
, True
, Iop_CmpLE32Fx4
, False
); break; // GE_OQ
9398 case 0xE: XXX(False
, True
, Iop_CmpLT32Fx4
, False
); break; // GT_OS
9399 case 0x1E: XXX(False
, True
, Iop_CmpLT32Fx4
, False
); break; // GT_OQ
9400 // Constant-value-result ops
9401 case 0xB: XXX(True
, False
, Iop_CmpEQ32Fx4
, True
); break; // FALSE_OQ
9402 case 0xF: XXX(True
, False
, Iop_CmpEQ32Fx4
, False
); break; // TRUE_UQ
9403 case 0x1B: XXX(True
, False
, Iop_CmpEQ32Fx4
, True
); break; // FALSE_OS
9404 case 0x1F: XXX(True
, False
, Iop_CmpEQ32Fx4
, False
); break; // TRUE_US
9405 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9406 avx-1.c if new cases turn up. */
9410 if (op
== Iop_INVALID
) return False
;
9412 /* Now convert the op into one with the same arithmetic but that is
9413 correct for the width and laneage requirements. */
9415 /**/ if (sz
== 4 && all_lanes
) {
9417 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32Fx4
; break;
9418 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32Fx4
; break;
9419 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32Fx4
; break;
9420 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32Fx4
; break;
9421 default: vassert(0);
9424 else if (sz
== 4 && !all_lanes
) {
9426 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32F0x4
; break;
9427 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32F0x4
; break;
9428 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32F0x4
; break;
9429 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32F0x4
; break;
9430 default: vassert(0);
9433 else if (sz
== 8 && all_lanes
) {
9435 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64Fx2
; break;
9436 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64Fx2
; break;
9437 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64Fx2
; break;
9438 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64Fx2
; break;
9439 default: vassert(0);
9442 else if (sz
== 8 && !all_lanes
) {
9444 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64F0x2
; break;
9445 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64F0x2
; break;
9446 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64F0x2
; break;
9447 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64F0x2
; break;
9448 default: vassert(0);
9452 vpanic("findSSECmpOp(amd64,guest)");
9456 // In this case, preSwap is irrelevant, but assert anyway.
9457 vassert(preSwap
== False
);
9459 *preZeroP
= preZero
; *preSwapP
= preSwap
; *opP
= op
; *postNotP
= postNot
;
9464 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9465 returns the original delta to indicate failure. */
9467 static Long
dis_SSE_cmp_E_to_G ( const VexAbiInfo
* vbi
,
9468 Prefix pfx
, Long delta
,
9469 const HChar
* opname
, Bool all_lanes
, Int sz
)
9471 Long delta0
= delta
;
9476 Bool preZero
= False
;
9477 Bool preSwap
= False
;
9478 IROp op
= Iop_INVALID
;
9479 Bool postNot
= False
;
9480 IRTemp plain
= newTemp(Ity_V128
);
9481 UChar rm
= getUChar(delta
);
9483 vassert(sz
== 4 || sz
== 8);
9484 if (epartIsReg(rm
)) {
9485 imm8
= getUChar(delta
+1);
9486 if (imm8
>= 8) return delta0
; /* FAIL */
9487 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
9488 imm8
, all_lanes
, sz
);
9489 if (!ok
) return delta0
; /* FAIL */
9490 vassert(!preZero
); /* never needed for imm8 < 8 */
9491 vassert(!preSwap
); /* never needed for imm8 < 8 */
9492 assign( plain
, binop(op
, getXMMReg(gregOfRexRM(pfx
,rm
)),
9493 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9495 DIP("%s $%u,%s,%s\n", opname
,
9497 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9498 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9500 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
9501 imm8
= getUChar(delta
+alen
);
9502 if (imm8
>= 8) return delta0
; /* FAIL */
9503 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
9504 imm8
, all_lanes
, sz
);
9505 if (!ok
) return delta0
; /* FAIL */
9506 vassert(!preZero
); /* never needed for imm8 < 8 */
9507 vassert(!preSwap
); /* never needed for imm8 < 8 */
9511 getXMMReg(gregOfRexRM(pfx
,rm
)),
9513 ? loadLE(Ity_V128
, mkexpr(addr
))
9515 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
9517 unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
9521 DIP("%s $%u,%s,%s\n", opname
,
9524 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9527 if (postNot
&& all_lanes
) {
9528 putXMMReg( gregOfRexRM(pfx
,rm
),
9529 unop(Iop_NotV128
, mkexpr(plain
)) );
9532 if (postNot
&& !all_lanes
) {
9533 mask
= toUShort(sz
==4 ? 0x000F : 0x00FF);
9534 putXMMReg( gregOfRexRM(pfx
,rm
),
9535 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
9538 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(plain
) );
9545 /* Vector by scalar shift of G by the amount specified at the bottom
9548 static ULong
dis_SSE_shiftG_byE ( const VexAbiInfo
* vbi
,
9549 Prefix pfx
, Long delta
,
9550 const HChar
* opname
, IROp op
)
9556 UChar rm
= getUChar(delta
);
9557 IRTemp g0
= newTemp(Ity_V128
);
9558 IRTemp g1
= newTemp(Ity_V128
);
9559 IRTemp amt
= newTemp(Ity_I64
);
9560 IRTemp amt8
= newTemp(Ity_I8
);
9561 if (epartIsReg(rm
)) {
9562 assign( amt
, getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0) );
9563 DIP("%s %s,%s\n", opname
,
9564 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9565 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9568 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9569 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
9570 DIP("%s %s,%s\n", opname
,
9572 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9575 assign( g0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9576 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
9578 shl
= shr
= sar
= False
;
9581 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
9582 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9583 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9584 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9585 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9586 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9587 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9588 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9589 default: vassert(0);
9596 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9597 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9606 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9607 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9608 binop(op
, mkexpr(g0
), mkU8(size
-1))
9615 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(g1
) );
9620 /* Vector by scalar shift of E by an immediate byte. */
9623 ULong
dis_SSE_shiftE_imm ( Prefix pfx
,
9624 Long delta
, const HChar
* opname
, IROp op
)
9627 UChar rm
= getUChar(delta
);
9628 IRTemp e0
= newTemp(Ity_V128
);
9629 IRTemp e1
= newTemp(Ity_V128
);
9631 vassert(epartIsReg(rm
));
9632 vassert(gregLO3ofRM(rm
) == 2
9633 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
9634 amt
= getUChar(delta
+1);
9636 DIP("%s $%d,%s\n", opname
,
9638 nameXMMReg(eregOfRexRM(pfx
,rm
)) );
9639 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
9641 shl
= shr
= sar
= False
;
9644 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
9645 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9646 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9647 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9648 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9649 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9650 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9651 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9652 default: vassert(0);
9656 assign( e1
, amt
>= size
9658 : binop(op
, mkexpr(e0
), mkU8(amt
))
9662 assign( e1
, amt
>= size
9663 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
9664 : binop(op
, mkexpr(e0
), mkU8(amt
))
9670 putXMMReg( eregOfRexRM(pfx
,rm
), mkexpr(e1
) );
9675 /* Get the current SSE rounding mode. */
9677 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9682 IRExpr_Get( OFFB_SSEROUND
, Ity_I64
),
9686 static void put_sse_roundingmode ( IRExpr
* sseround
)
9688 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
9689 stmt( IRStmt_Put( OFFB_SSEROUND
,
9690 unop(Iop_32Uto64
,sseround
) ) );
9693 /* Break a V128-bit value up into four 32-bit ints. */
9695 static void breakupV128to32s ( IRTemp t128
,
9697 IRTemp
* t3
, IRTemp
* t2
,
9698 IRTemp
* t1
, IRTemp
* t0
)
9700 IRTemp hi64
= newTemp(Ity_I64
);
9701 IRTemp lo64
= newTemp(Ity_I64
);
9702 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9703 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
9705 vassert(t0
&& *t0
== IRTemp_INVALID
);
9706 vassert(t1
&& *t1
== IRTemp_INVALID
);
9707 vassert(t2
&& *t2
== IRTemp_INVALID
);
9708 vassert(t3
&& *t3
== IRTemp_INVALID
);
9710 *t0
= newTemp(Ity_I32
);
9711 *t1
= newTemp(Ity_I32
);
9712 *t2
= newTemp(Ity_I32
);
9713 *t3
= newTemp(Ity_I32
);
9714 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
9715 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
9716 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
9717 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
9720 /* Construct a V128-bit value from four 32-bit ints. */
9722 static IRExpr
* mkV128from32s ( IRTemp t3
, IRTemp t2
,
9723 IRTemp t1
, IRTemp t0
)
9726 binop( Iop_64HLtoV128
,
9727 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9728 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
9732 /* Break a 64-bit value up into four 16-bit ints. */
9734 static void breakup64to16s ( IRTemp t64
,
9736 IRTemp
* t3
, IRTemp
* t2
,
9737 IRTemp
* t1
, IRTemp
* t0
)
9739 IRTemp hi32
= newTemp(Ity_I32
);
9740 IRTemp lo32
= newTemp(Ity_I32
);
9741 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
9742 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
9744 vassert(t0
&& *t0
== IRTemp_INVALID
);
9745 vassert(t1
&& *t1
== IRTemp_INVALID
);
9746 vassert(t2
&& *t2
== IRTemp_INVALID
);
9747 vassert(t3
&& *t3
== IRTemp_INVALID
);
9749 *t0
= newTemp(Ity_I16
);
9750 *t1
= newTemp(Ity_I16
);
9751 *t2
= newTemp(Ity_I16
);
9752 *t3
= newTemp(Ity_I16
);
9753 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
9754 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
9755 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
9756 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
9759 /* Construct a 64-bit value from four 16-bit ints. */
9761 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
9762 IRTemp t1
, IRTemp t0
)
9765 binop( Iop_32HLto64
,
9766 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
9767 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
9771 /* Break a V256-bit value up into four 64-bit ints. */
9773 static void breakupV256to64s ( IRTemp t256
,
9775 IRTemp
* t3
, IRTemp
* t2
,
9776 IRTemp
* t1
, IRTemp
* t0
)
9778 vassert(t0
&& *t0
== IRTemp_INVALID
);
9779 vassert(t1
&& *t1
== IRTemp_INVALID
);
9780 vassert(t2
&& *t2
== IRTemp_INVALID
);
9781 vassert(t3
&& *t3
== IRTemp_INVALID
);
9782 *t0
= newTemp(Ity_I64
);
9783 *t1
= newTemp(Ity_I64
);
9784 *t2
= newTemp(Ity_I64
);
9785 *t3
= newTemp(Ity_I64
);
9786 assign( *t0
, unop(Iop_V256to64_0
, mkexpr(t256
)) );
9787 assign( *t1
, unop(Iop_V256to64_1
, mkexpr(t256
)) );
9788 assign( *t2
, unop(Iop_V256to64_2
, mkexpr(t256
)) );
9789 assign( *t3
, unop(Iop_V256to64_3
, mkexpr(t256
)) );
9792 /* Break a V256-bit value up into two V128s. */
9794 static void breakupV256toV128s ( IRTemp t256
,
9796 IRTemp
* t1
, IRTemp
* t0
)
9798 vassert(t0
&& *t0
== IRTemp_INVALID
);
9799 vassert(t1
&& *t1
== IRTemp_INVALID
);
9800 *t0
= newTemp(Ity_V128
);
9801 *t1
= newTemp(Ity_V128
);
9802 assign(*t1
, unop(Iop_V256toV128_1
, mkexpr(t256
)));
9803 assign(*t0
, unop(Iop_V256toV128_0
, mkexpr(t256
)));
9806 /* Break a V256-bit value up into eight 32-bit ints. */
9808 static void breakupV256to32s ( IRTemp t256
,
9810 IRTemp
* t7
, IRTemp
* t6
,
9811 IRTemp
* t5
, IRTemp
* t4
,
9812 IRTemp
* t3
, IRTemp
* t2
,
9813 IRTemp
* t1
, IRTemp
* t0
)
9815 IRTemp t128_1
= IRTemp_INVALID
;
9816 IRTemp t128_0
= IRTemp_INVALID
;
9817 breakupV256toV128s( t256
, &t128_1
, &t128_0
);
9818 breakupV128to32s( t128_1
, t7
, t6
, t5
, t4
);
9819 breakupV128to32s( t128_0
, t3
, t2
, t1
, t0
);
9822 /* Break a V128-bit value up into two 64-bit ints. */
9824 static void breakupV128to64s ( IRTemp t128
,
9826 IRTemp
* t1
, IRTemp
* t0
)
9828 vassert(t0
&& *t0
== IRTemp_INVALID
);
9829 vassert(t1
&& *t1
== IRTemp_INVALID
);
9830 *t0
= newTemp(Ity_I64
);
9831 *t1
= newTemp(Ity_I64
);
9832 assign( *t0
, unop(Iop_V128to64
, mkexpr(t128
)) );
9833 assign( *t1
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9836 /* Construct a V256-bit value from eight 32-bit ints. */
9838 static IRExpr
* mkV256from32s ( IRTemp t7
, IRTemp t6
,
9839 IRTemp t5
, IRTemp t4
,
9840 IRTemp t3
, IRTemp t2
,
9841 IRTemp t1
, IRTemp t0
)
9844 binop( Iop_V128HLtoV256
,
9845 binop( Iop_64HLtoV128
,
9846 binop(Iop_32HLto64
, mkexpr(t7
), mkexpr(t6
)),
9847 binop(Iop_32HLto64
, mkexpr(t5
), mkexpr(t4
)) ),
9848 binop( Iop_64HLtoV128
,
9849 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9850 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
)) )
9854 /* Construct a V256-bit value from four 64-bit ints. */
9856 static IRExpr
* mkV256from64s ( IRTemp t3
, IRTemp t2
,
9857 IRTemp t1
, IRTemp t0
)
9860 binop( Iop_V128HLtoV256
,
9861 binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)),
9862 binop(Iop_64HLtoV128
, mkexpr(t1
), mkexpr(t0
))
9866 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9867 values (aa,bb), computes, for each of the 4 16-bit lanes:
9869 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9871 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
9873 IRTemp aa
= newTemp(Ity_I64
);
9874 IRTemp bb
= newTemp(Ity_I64
);
9875 IRTemp aahi32s
= newTemp(Ity_I64
);
9876 IRTemp aalo32s
= newTemp(Ity_I64
);
9877 IRTemp bbhi32s
= newTemp(Ity_I64
);
9878 IRTemp bblo32s
= newTemp(Ity_I64
);
9879 IRTemp rHi
= newTemp(Ity_I64
);
9880 IRTemp rLo
= newTemp(Ity_I64
);
9881 IRTemp one32x2
= newTemp(Ity_I64
);
9886 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
9890 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
9894 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
9898 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
9900 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
9909 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
9925 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
9934 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
9937 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9938 values (aa,bb), computes, for each lane:
9940 if aa_lane < 0 then - bb_lane
9941 else if aa_lane > 0 then bb_lane
9944 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
9946 IRTemp aa
= newTemp(Ity_I64
);
9947 IRTemp bb
= newTemp(Ity_I64
);
9948 IRTemp zero
= newTemp(Ity_I64
);
9949 IRTemp bbNeg
= newTemp(Ity_I64
);
9950 IRTemp negMask
= newTemp(Ity_I64
);
9951 IRTemp posMask
= newTemp(Ity_I64
);
9952 IROp opSub
= Iop_INVALID
;
9953 IROp opCmpGTS
= Iop_INVALID
;
9956 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
9957 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
9958 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
9959 default: vassert(0);
9964 assign( zero
, mkU64(0) );
9965 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
9966 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
9967 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
9971 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
9972 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
9977 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9978 value aa, computes, for each lane
9980 if aa < 0 then -aa else aa
9982 Note that the result is interpreted as unsigned, so that the
9983 absolute value of the most negative signed input can be
9986 static IRTemp
math_PABS_MMX ( IRTemp aa
, Int laneszB
)
9988 IRTemp res
= newTemp(Ity_I64
);
9989 IRTemp zero
= newTemp(Ity_I64
);
9990 IRTemp aaNeg
= newTemp(Ity_I64
);
9991 IRTemp negMask
= newTemp(Ity_I64
);
9992 IRTemp posMask
= newTemp(Ity_I64
);
9993 IROp opSub
= Iop_INVALID
;
9994 IROp opSarN
= Iop_INVALID
;
9997 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
9998 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
9999 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
10000 default: vassert(0);
10003 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
10004 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
10005 assign( zero
, mkU64(0) );
10006 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
10009 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
10010 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) ));
10014 /* XMM version of math_PABS_MMX. */
10015 static IRTemp
math_PABS_XMM ( IRTemp aa
, Int laneszB
)
10017 IRTemp res
= newTemp(Ity_V128
);
10018 IRTemp aaHi
= newTemp(Ity_I64
);
10019 IRTemp aaLo
= newTemp(Ity_I64
);
10020 assign(aaHi
, unop(Iop_V128HIto64
, mkexpr(aa
)));
10021 assign(aaLo
, unop(Iop_V128to64
, mkexpr(aa
)));
10022 assign(res
, binop(Iop_64HLtoV128
,
10023 mkexpr(math_PABS_MMX(aaHi
, laneszB
)),
10024 mkexpr(math_PABS_MMX(aaLo
, laneszB
))));
10028 /* Specialisations of math_PABS_XMM, since there's no easy way to do
10029 partial applications in C :-( */
10030 static IRTemp
math_PABS_XMM_pap4 ( IRTemp aa
) {
10031 return math_PABS_XMM(aa
, 4);
10034 static IRTemp
math_PABS_XMM_pap2 ( IRTemp aa
) {
10035 return math_PABS_XMM(aa
, 2);
10038 static IRTemp
math_PABS_XMM_pap1 ( IRTemp aa
) {
10039 return math_PABS_XMM(aa
, 1);
10042 /* YMM version of math_PABS_XMM. */
10043 static IRTemp
math_PABS_YMM ( IRTemp aa
, Int laneszB
)
10045 IRTemp res
= newTemp(Ity_V256
);
10046 IRTemp aaHi
= IRTemp_INVALID
;
10047 IRTemp aaLo
= IRTemp_INVALID
;
10048 breakupV256toV128s(aa
, &aaHi
, &aaLo
);
10049 assign(res
, binop(Iop_V128HLtoV256
,
10050 mkexpr(math_PABS_XMM(aaHi
, laneszB
)),
10051 mkexpr(math_PABS_XMM(aaLo
, laneszB
))));
10055 static IRTemp
math_PABS_YMM_pap4 ( IRTemp aa
) {
10056 return math_PABS_YMM(aa
, 4);
10059 static IRTemp
math_PABS_YMM_pap2 ( IRTemp aa
) {
10060 return math_PABS_YMM(aa
, 2);
10063 static IRTemp
math_PABS_YMM_pap1 ( IRTemp aa
) {
10064 return math_PABS_YMM(aa
, 1);
10067 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
10068 IRTemp lo64
, Long byteShift
)
10070 vassert(byteShift
>= 1 && byteShift
<= 7);
10073 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
10074 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
10078 static IRTemp
math_PALIGNR_XMM ( IRTemp sV
, IRTemp dV
, UInt imm8
)
10080 IRTemp res
= newTemp(Ity_V128
);
10081 IRTemp sHi
= newTemp(Ity_I64
);
10082 IRTemp sLo
= newTemp(Ity_I64
);
10083 IRTemp dHi
= newTemp(Ity_I64
);
10084 IRTemp dLo
= newTemp(Ity_I64
);
10085 IRTemp rHi
= newTemp(Ity_I64
);
10086 IRTemp rLo
= newTemp(Ity_I64
);
10088 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10089 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
10090 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10091 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
10094 assign( rHi
, mkexpr(sHi
) );
10095 assign( rLo
, mkexpr(sLo
) );
10097 else if (imm8
>= 1 && imm8
<= 7) {
10098 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
) );
10099 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, imm8
) );
10101 else if (imm8
== 8) {
10102 assign( rHi
, mkexpr(dLo
) );
10103 assign( rLo
, mkexpr(sHi
) );
10105 else if (imm8
>= 9 && imm8
<= 15) {
10106 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-8) );
10107 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
-8) );
10109 else if (imm8
== 16) {
10110 assign( rHi
, mkexpr(dHi
) );
10111 assign( rLo
, mkexpr(dLo
) );
10113 else if (imm8
>= 17 && imm8
<= 23) {
10114 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-16))) );
10115 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-16) );
10117 else if (imm8
== 24) {
10118 assign( rHi
, mkU64(0) );
10119 assign( rLo
, mkexpr(dHi
) );
10121 else if (imm8
>= 25 && imm8
<= 31) {
10122 assign( rHi
, mkU64(0) );
10123 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-24))) );
10125 else if (imm8
>= 32 && imm8
<= 255) {
10126 assign( rHi
, mkU64(0) );
10127 assign( rLo
, mkU64(0) );
10132 assign( res
, binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
)));
10136 /* Generate a SIGSEGV followed by a restart of the current instruction
10137 if effective_addr is not 16-aligned. This is required behaviour
10138 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10139 This assumes that guest_RIP_curr_instr is set correctly!
10140 On FreeBSD, this kind of error generates a SIGBUS. */
10142 void gen_SIGNAL_if_not_XX_aligned ( const VexAbiInfo
* vbi
,
10143 IRTemp effective_addr
, ULong mask
)
10148 binop(Iop_And64
,mkexpr(effective_addr
),mkU64(mask
)),
10150 vbi
->guest_amd64_sigbus_on_misalign
? Ijk_SigBUS
: Ijk_SigSEGV
,
10151 IRConst_U64(guest_RIP_curr_instr
),
10157 static void gen_SIGNAL_if_not_16_aligned ( const VexAbiInfo
* vbi
,
10158 IRTemp effective_addr
) {
10159 gen_SIGNAL_if_not_XX_aligned(vbi
, effective_addr
, 16-1);
10162 static void gen_SIGNAL_if_not_32_aligned ( const VexAbiInfo
* vbi
,
10163 IRTemp effective_addr
) {
10164 gen_SIGNAL_if_not_XX_aligned(vbi
, effective_addr
, 32-1);
10167 static void gen_SIGNAL_if_not_64_aligned ( const VexAbiInfo
* vbi
,
10168 IRTemp effective_addr
) {
10169 gen_SIGNAL_if_not_XX_aligned(vbi
, effective_addr
, 64-1);
10173 /* Helper for deciding whether a given insn (starting at the opcode
10174 byte) may validly be used with a LOCK prefix. The following insns
10175 may be used with LOCK when their destination operand is in memory.
10176 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10178 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10179 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10180 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10181 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10182 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10183 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10184 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10194 BTC 0F BB, 0F BA /7
10195 BTR 0F B3, 0F BA /6
10196 BTS 0F AB, 0F BA /5
10198 CMPXCHG 0F B0, 0F B1
10203 ------------------------------
10205 80 /0 = addb $imm8, rm8
10206 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10207 82 /0 = addb $imm8, rm8
10208 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10211 01 = addl r32, rm32 and addw r16, rm16
10213 Same for ADD OR ADC SBB AND SUB XOR
10216 FF /1 = dec rm32 and dec rm16
10219 FF /0 = inc rm32 and inc rm16
10222 F7 /3 = neg rm32 and neg rm16
10225 F7 /2 = not rm32 and not rm16
10227 0F BB = btcw r16, rm16 and btcl r32, rm32
10228 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10232 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
10235 case 0x00: case 0x01: case 0x08: case 0x09:
10236 case 0x10: case 0x11: case 0x18: case 0x19:
10237 case 0x20: case 0x21: case 0x28: case 0x29:
10238 case 0x30: case 0x31:
10239 if (!epartIsReg(opc
[1]))
10243 case 0x80: case 0x81: case 0x82: case 0x83:
10244 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 6
10245 && !epartIsReg(opc
[1]))
10249 case 0xFE: case 0xFF:
10250 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 1
10251 && !epartIsReg(opc
[1]))
10255 case 0xF6: case 0xF7:
10256 if (gregLO3ofRM(opc
[1]) >= 2 && gregLO3ofRM(opc
[1]) <= 3
10257 && !epartIsReg(opc
[1]))
10261 case 0x86: case 0x87:
10262 if (!epartIsReg(opc
[1]))
10268 case 0xBB: case 0xB3: case 0xAB:
10269 if (!epartIsReg(opc
[2]))
10273 if (gregLO3ofRM(opc
[2]) >= 5 && gregLO3ofRM(opc
[2]) <= 7
10274 && !epartIsReg(opc
[2]))
10277 case 0xB0: case 0xB1:
10278 if (!epartIsReg(opc
[2]))
10282 if (gregLO3ofRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
10285 case 0xC0: case 0xC1:
10286 if (!epartIsReg(opc
[2]))
10291 } /* switch (opc[1]) */
10297 } /* switch (opc[0]) */
10303 /*------------------------------------------------------------*/
10305 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10307 /*------------------------------------------------------------*/
10309 static Long
dis_COMISD ( const VexAbiInfo
* vbi
, Prefix pfx
,
10310 Long delta
, Bool isAvx
, UChar opc
)
10312 vassert(opc
== 0x2F/*COMISD*/ || opc
== 0x2E/*UCOMISD*/);
10315 IRTemp argL
= newTemp(Ity_F64
);
10316 IRTemp argR
= newTemp(Ity_F64
);
10317 UChar modrm
= getUChar(delta
);
10318 IRTemp addr
= IRTemp_INVALID
;
10319 if (epartIsReg(modrm
)) {
10320 assign( argR
, getXMMRegLane64F( eregOfRexRM(pfx
,modrm
),
10321 0/*lowest lane*/ ) );
10323 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10324 opc
==0x2E ? "u" : "",
10325 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10326 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10328 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10329 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
10331 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10332 opc
==0x2E ? "u" : "",
10334 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10336 assign( argL
, getXMMRegLane64F( gregOfRexRM(pfx
,modrm
),
10337 0/*lowest lane*/ ) );
10339 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10340 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10345 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)) ),
10348 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
10353 static Long
dis_COMISS ( const VexAbiInfo
* vbi
, Prefix pfx
,
10354 Long delta
, Bool isAvx
, UChar opc
)
10356 vassert(opc
== 0x2F/*COMISS*/ || opc
== 0x2E/*UCOMISS*/);
10359 IRTemp argL
= newTemp(Ity_F32
);
10360 IRTemp argR
= newTemp(Ity_F32
);
10361 UChar modrm
= getUChar(delta
);
10362 IRTemp addr
= IRTemp_INVALID
;
10363 if (epartIsReg(modrm
)) {
10364 assign( argR
, getXMMRegLane32F( eregOfRexRM(pfx
,modrm
),
10365 0/*lowest lane*/ ) );
10367 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10368 opc
==0x2E ? "u" : "",
10369 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10370 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10372 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10373 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
10375 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10376 opc
==0x2E ? "u" : "",
10378 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10380 assign( argL
, getXMMRegLane32F( gregOfRexRM(pfx
,modrm
),
10381 0/*lowest lane*/ ) );
10383 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10384 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10390 unop(Iop_F32toF64
,mkexpr(argL
)),
10391 unop(Iop_F32toF64
,mkexpr(argR
)))),
10394 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
10399 static Long
dis_PSHUFD_32x4 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10400 Long delta
, Bool writesYmm
)
10405 IRTemp sV
= newTemp(Ity_V128
);
10406 UChar modrm
= getUChar(delta
);
10407 const HChar
* strV
= writesYmm
? "v" : "";
10408 IRTemp addr
= IRTemp_INVALID
;
10409 if (epartIsReg(modrm
)) {
10410 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
10411 order
= (Int
)getUChar(delta
+1);
10413 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10414 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10415 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10417 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10418 1/*byte after the amode*/ );
10419 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10420 order
= (Int
)getUChar(delta
+alen
);
10422 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10424 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10427 IRTemp s3
, s2
, s1
, s0
;
10428 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
10429 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
10431 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10432 IRTemp dV
= newTemp(Ity_V128
);
10434 mkV128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
10435 SEL((order
>>2)&3), SEL((order
>>0)&3) )
10439 (writesYmm
? putYMMRegLoAndZU
: putXMMReg
)
10440 (gregOfRexRM(pfx
,modrm
), mkexpr(dV
));
10445 static Long
dis_PSHUFD_32x8 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
10450 IRTemp sV
= newTemp(Ity_V256
);
10451 UChar modrm
= getUChar(delta
);
10452 IRTemp addr
= IRTemp_INVALID
;
10453 UInt rG
= gregOfRexRM(pfx
,modrm
);
10454 if (epartIsReg(modrm
)) {
10455 UInt rE
= eregOfRexRM(pfx
,modrm
);
10456 assign( sV
, getYMMReg(rE
) );
10457 order
= (Int
)getUChar(delta
+1);
10459 DIP("vpshufd $%d,%s,%s\n", order
, nameYMMReg(rE
), nameYMMReg(rG
));
10461 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10462 1/*byte after the amode*/ );
10463 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10464 order
= (Int
)getUChar(delta
+alen
);
10466 DIP("vpshufd $%d,%s,%s\n", order
, dis_buf
, nameYMMReg(rG
));
10470 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
10471 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
10472 &s
[3], &s
[2], &s
[1], &s
[0] );
10474 putYMMReg( rG
, mkV256from32s( s
[4 + ((order
>>6)&3)],
10475 s
[4 + ((order
>>4)&3)],
10476 s
[4 + ((order
>>2)&3)],
10477 s
[4 + ((order
>>0)&3)],
10478 s
[0 + ((order
>>6)&3)],
10479 s
[0 + ((order
>>4)&3)],
10480 s
[0 + ((order
>>2)&3)],
10481 s
[0 + ((order
>>0)&3)] ) );
10486 static IRTemp
math_PSRLDQ ( IRTemp sV
, Int imm
)
10488 IRTemp dV
= newTemp(Ity_V128
);
10489 IRTemp hi64
= newTemp(Ity_I64
);
10490 IRTemp lo64
= newTemp(Ity_I64
);
10491 IRTemp hi64r
= newTemp(Ity_I64
);
10492 IRTemp lo64r
= newTemp(Ity_I64
);
10494 vassert(imm
>= 0 && imm
<= 255);
10496 assign(dV
, mkV128(0x0000));
10500 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10501 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10504 assign( lo64r
, mkexpr(lo64
) );
10505 assign( hi64r
, mkexpr(hi64
) );
10509 assign( hi64r
, mkU64(0) );
10510 assign( lo64r
, mkexpr(hi64
) );
10514 assign( hi64r
, mkU64(0) );
10515 assign( lo64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8( 8*(imm
-8) ) ));
10517 assign( hi64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8(8 * imm
) ));
10520 binop(Iop_Shr64
, mkexpr(lo64
),
10522 binop(Iop_Shl64
, mkexpr(hi64
),
10523 mkU8(8 * (8 - imm
)) )
10528 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10533 static IRTemp
math_PSLLDQ ( IRTemp sV
, Int imm
)
10535 IRTemp dV
= newTemp(Ity_V128
);
10536 IRTemp hi64
= newTemp(Ity_I64
);
10537 IRTemp lo64
= newTemp(Ity_I64
);
10538 IRTemp hi64r
= newTemp(Ity_I64
);
10539 IRTemp lo64r
= newTemp(Ity_I64
);
10541 vassert(imm
>= 0 && imm
<= 255);
10543 assign(dV
, mkV128(0x0000));
10547 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10548 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10551 assign( lo64r
, mkexpr(lo64
) );
10552 assign( hi64r
, mkexpr(hi64
) );
10556 assign( lo64r
, mkU64(0) );
10557 assign( hi64r
, mkexpr(lo64
) );
10561 assign( lo64r
, mkU64(0) );
10562 assign( hi64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8( 8*(imm
-8) ) ));
10564 assign( lo64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8(8 * imm
) ));
10567 binop(Iop_Shl64
, mkexpr(hi64
),
10569 binop(Iop_Shr64
, mkexpr(lo64
),
10570 mkU8(8 * (8 - imm
)) )
10575 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10580 static Long
dis_CVTxSD2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10581 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10583 vassert(opc
== 0x2D/*CVTSD2SI*/ || opc
== 0x2C/*CVTTSD2SI*/);
10586 UChar modrm
= getUChar(delta
);
10587 IRTemp addr
= IRTemp_INVALID
;
10588 IRTemp rmode
= newTemp(Ity_I32
);
10589 IRTemp f64lo
= newTemp(Ity_F64
);
10590 Bool r2zero
= toBool(opc
== 0x2C);
10592 if (epartIsReg(modrm
)) {
10594 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
10595 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10596 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10597 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10600 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10601 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10603 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10605 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10610 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10612 assign( rmode
, get_sse_roundingmode() );
10616 putIReg32( gregOfRexRM(pfx
,modrm
),
10617 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10620 putIReg64( gregOfRexRM(pfx
,modrm
),
10621 binop( Iop_F64toI64S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10628 static Long
dis_CVTxSS2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10629 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10631 vassert(opc
== 0x2D/*CVTSS2SI*/ || opc
== 0x2C/*CVTTSS2SI*/);
10634 UChar modrm
= getUChar(delta
);
10635 IRTemp addr
= IRTemp_INVALID
;
10636 IRTemp rmode
= newTemp(Ity_I32
);
10637 IRTemp f32lo
= newTemp(Ity_F32
);
10638 Bool r2zero
= toBool(opc
== 0x2C);
10640 if (epartIsReg(modrm
)) {
10642 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
10643 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10644 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10645 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10648 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10649 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10651 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10653 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10658 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10660 assign( rmode
, get_sse_roundingmode() );
10664 putIReg32( gregOfRexRM(pfx
,modrm
),
10665 binop( Iop_F64toI32S
,
10667 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10670 putIReg64( gregOfRexRM(pfx
,modrm
),
10671 binop( Iop_F64toI64S
,
10673 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10680 static Long
dis_CVTPS2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10681 Long delta
, Bool isAvx
)
10683 IRTemp addr
= IRTemp_INVALID
;
10686 IRTemp f32lo
= newTemp(Ity_F32
);
10687 IRTemp f32hi
= newTemp(Ity_F32
);
10688 UChar modrm
= getUChar(delta
);
10689 UInt rG
= gregOfRexRM(pfx
,modrm
);
10690 if (epartIsReg(modrm
)) {
10691 UInt rE
= eregOfRexRM(pfx
,modrm
);
10692 assign( f32lo
, getXMMRegLane32F(rE
, 0) );
10693 assign( f32hi
, getXMMRegLane32F(rE
, 1) );
10695 DIP("%scvtps2pd %s,%s\n",
10696 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10698 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10699 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
10700 assign( f32hi
, loadLE(Ity_F32
,
10701 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10703 DIP("%scvtps2pd %s,%s\n",
10704 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
10707 putXMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32hi
)) );
10708 putXMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32lo
)) );
10710 putYMMRegLane128( rG
, 1, mkV128(0));
10715 static Long
dis_CVTPS2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10718 IRTemp addr
= IRTemp_INVALID
;
10721 IRTemp f32_0
= newTemp(Ity_F32
);
10722 IRTemp f32_1
= newTemp(Ity_F32
);
10723 IRTemp f32_2
= newTemp(Ity_F32
);
10724 IRTemp f32_3
= newTemp(Ity_F32
);
10725 UChar modrm
= getUChar(delta
);
10726 UInt rG
= gregOfRexRM(pfx
,modrm
);
10727 if (epartIsReg(modrm
)) {
10728 UInt rE
= eregOfRexRM(pfx
,modrm
);
10729 assign( f32_0
, getXMMRegLane32F(rE
, 0) );
10730 assign( f32_1
, getXMMRegLane32F(rE
, 1) );
10731 assign( f32_2
, getXMMRegLane32F(rE
, 2) );
10732 assign( f32_3
, getXMMRegLane32F(rE
, 3) );
10734 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
10736 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10737 assign( f32_0
, loadLE(Ity_F32
, mkexpr(addr
)) );
10738 assign( f32_1
, loadLE(Ity_F32
,
10739 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10740 assign( f32_2
, loadLE(Ity_F32
,
10741 binop(Iop_Add64
,mkexpr(addr
),mkU64(8))) );
10742 assign( f32_3
, loadLE(Ity_F32
,
10743 binop(Iop_Add64
,mkexpr(addr
),mkU64(12))) );
10745 DIP("vcvtps2pd %s,%s\n", dis_buf
, nameYMMReg(rG
));
10748 putYMMRegLane64F( rG
, 3, unop(Iop_F32toF64
, mkexpr(f32_3
)) );
10749 putYMMRegLane64F( rG
, 2, unop(Iop_F32toF64
, mkexpr(f32_2
)) );
10750 putYMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32_1
)) );
10751 putYMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32_0
)) );
10756 static Long
dis_CVTPD2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10757 Long delta
, Bool isAvx
)
10759 IRTemp addr
= IRTemp_INVALID
;
10762 UChar modrm
= getUChar(delta
);
10763 UInt rG
= gregOfRexRM(pfx
,modrm
);
10764 IRTemp argV
= newTemp(Ity_V128
);
10765 IRTemp rmode
= newTemp(Ity_I32
);
10766 if (epartIsReg(modrm
)) {
10767 UInt rE
= eregOfRexRM(pfx
,modrm
);
10768 assign( argV
, getXMMReg(rE
) );
10770 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10771 nameXMMReg(rE
), nameXMMReg(rG
));
10773 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10774 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10776 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10777 dis_buf
, nameXMMReg(rG
) );
10780 assign( rmode
, get_sse_roundingmode() );
10781 IRTemp t0
= newTemp(Ity_F64
);
10782 IRTemp t1
= newTemp(Ity_F64
);
10783 assign( t0
, unop(Iop_ReinterpI64asF64
,
10784 unop(Iop_V128to64
, mkexpr(argV
))) );
10785 assign( t1
, unop(Iop_ReinterpI64asF64
,
10786 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10788 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10789 putXMMRegLane32( rG
, 3, mkU32(0) );
10790 putXMMRegLane32( rG
, 2, mkU32(0) );
10791 putXMMRegLane32F( rG
, 1, CVT(t1
) );
10792 putXMMRegLane32F( rG
, 0, CVT(t0
) );
10795 putYMMRegLane128( rG
, 1, mkV128(0) );
10801 static Long
dis_CVTxPS2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10802 Long delta
, Bool isAvx
, Bool r2zero
)
10804 IRTemp addr
= IRTemp_INVALID
;
10807 UChar modrm
= getUChar(delta
);
10808 IRTemp argV
= newTemp(Ity_V128
);
10809 IRTemp rmode
= newTemp(Ity_I32
);
10810 UInt rG
= gregOfRexRM(pfx
,modrm
);
10812 if (epartIsReg(modrm
)) {
10813 UInt rE
= eregOfRexRM(pfx
,modrm
);
10814 assign( argV
, getXMMReg(rE
) );
10816 DIP("%scvt%sps2dq %s,%s\n",
10817 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10819 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10820 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10822 DIP("%scvt%sps2dq %s,%s\n",
10823 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10826 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10827 : get_sse_roundingmode() );
10828 putXMMReg( rG
, binop(Iop_F32toI32Sx4
, mkexpr(rmode
), mkexpr(argV
)) );
10830 putYMMRegLane128( rG
, 1, mkV128(0) );
10836 static Long
dis_CVTxPS2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10837 Long delta
, Bool r2zero
)
10839 IRTemp addr
= IRTemp_INVALID
;
10842 UChar modrm
= getUChar(delta
);
10843 IRTemp argV
= newTemp(Ity_V256
);
10844 IRTemp rmode
= newTemp(Ity_I32
);
10845 UInt rG
= gregOfRexRM(pfx
,modrm
);
10847 if (epartIsReg(modrm
)) {
10848 UInt rE
= eregOfRexRM(pfx
,modrm
);
10849 assign( argV
, getYMMReg(rE
) );
10851 DIP("vcvt%sps2dq %s,%s\n",
10852 r2zero
? "t" : "", nameYMMReg(rE
), nameYMMReg(rG
));
10854 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10855 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10857 DIP("vcvt%sps2dq %s,%s\n",
10858 r2zero
? "t" : "", dis_buf
, nameYMMReg(rG
) );
10861 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10862 : get_sse_roundingmode() );
10863 putYMMReg( rG
, binop(Iop_F32toI32Sx8
, mkexpr(rmode
), mkexpr(argV
)) );
10868 static Long
dis_CVTxPD2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10869 Long delta
, Bool isAvx
, Bool r2zero
)
10871 IRTemp addr
= IRTemp_INVALID
;
10874 UChar modrm
= getUChar(delta
);
10875 IRTemp argV
= newTemp(Ity_V128
);
10876 IRTemp rmode
= newTemp(Ity_I32
);
10877 UInt rG
= gregOfRexRM(pfx
,modrm
);
10880 if (epartIsReg(modrm
)) {
10881 UInt rE
= eregOfRexRM(pfx
,modrm
);
10882 assign( argV
, getXMMReg(rE
) );
10884 DIP("%scvt%spd2dq %s,%s\n",
10885 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10887 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10888 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10890 DIP("%scvt%spd2dqx %s,%s\n",
10891 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10895 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10897 assign( rmode
, get_sse_roundingmode() );
10900 t0
= newTemp(Ity_F64
);
10901 t1
= newTemp(Ity_F64
);
10902 assign( t0
, unop(Iop_ReinterpI64asF64
,
10903 unop(Iop_V128to64
, mkexpr(argV
))) );
10904 assign( t1
, unop(Iop_ReinterpI64asF64
,
10905 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10907 # define CVT(_t) binop( Iop_F64toI32S, \
10911 putXMMRegLane32( rG
, 3, mkU32(0) );
10912 putXMMRegLane32( rG
, 2, mkU32(0) );
10913 putXMMRegLane32( rG
, 1, CVT(t1
) );
10914 putXMMRegLane32( rG
, 0, CVT(t0
) );
10917 putYMMRegLane128( rG
, 1, mkV128(0) );
10923 static Long
dis_CVTxPD2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10924 Long delta
, Bool r2zero
)
10926 IRTemp addr
= IRTemp_INVALID
;
10929 UChar modrm
= getUChar(delta
);
10930 IRTemp argV
= newTemp(Ity_V256
);
10931 IRTemp rmode
= newTemp(Ity_I32
);
10932 UInt rG
= gregOfRexRM(pfx
,modrm
);
10933 IRTemp t0
, t1
, t2
, t3
;
10935 if (epartIsReg(modrm
)) {
10936 UInt rE
= eregOfRexRM(pfx
,modrm
);
10937 assign( argV
, getYMMReg(rE
) );
10939 DIP("vcvt%spd2dq %s,%s\n",
10940 r2zero
? "t" : "", nameYMMReg(rE
), nameXMMReg(rG
));
10942 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10943 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10945 DIP("vcvt%spd2dqy %s,%s\n",
10946 r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10950 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10952 assign( rmode
, get_sse_roundingmode() );
10955 t0
= IRTemp_INVALID
;
10956 t1
= IRTemp_INVALID
;
10957 t2
= IRTemp_INVALID
;
10958 t3
= IRTemp_INVALID
;
10959 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
10961 # define CVT(_t) binop( Iop_F64toI32S, \
10963 unop( Iop_ReinterpI64asF64, \
10966 putXMMRegLane32( rG
, 3, CVT(t3
) );
10967 putXMMRegLane32( rG
, 2, CVT(t2
) );
10968 putXMMRegLane32( rG
, 1, CVT(t1
) );
10969 putXMMRegLane32( rG
, 0, CVT(t0
) );
10971 putYMMRegLane128( rG
, 1, mkV128(0) );
10977 static Long
dis_CVTDQ2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10978 Long delta
, Bool isAvx
)
10980 IRTemp addr
= IRTemp_INVALID
;
10983 UChar modrm
= getUChar(delta
);
10984 IRTemp argV
= newTemp(Ity_V128
);
10985 IRTemp rmode
= newTemp(Ity_I32
);
10986 UInt rG
= gregOfRexRM(pfx
,modrm
);
10988 if (epartIsReg(modrm
)) {
10989 UInt rE
= eregOfRexRM(pfx
,modrm
);
10990 assign( argV
, getXMMReg(rE
) );
10992 DIP("%scvtdq2ps %s,%s\n",
10993 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10995 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10996 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10998 DIP("%scvtdq2ps %s,%s\n",
10999 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
) );
11002 assign( rmode
, get_sse_roundingmode() );
11003 putXMMReg(rG
, binop(Iop_I32StoF32x4
, mkexpr(rmode
), mkexpr(argV
)));
11006 putYMMRegLane128( rG
, 1, mkV128(0) );
11011 static Long
dis_CVTDQ2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11014 IRTemp addr
= IRTemp_INVALID
;
11017 UChar modrm
= getUChar(delta
);
11018 IRTemp argV
= newTemp(Ity_V256
);
11019 IRTemp rmode
= newTemp(Ity_I32
);
11020 UInt rG
= gregOfRexRM(pfx
,modrm
);
11022 if (epartIsReg(modrm
)) {
11023 UInt rE
= eregOfRexRM(pfx
,modrm
);
11024 assign( argV
, getYMMReg(rE
) );
11026 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
11028 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11029 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
11031 DIP("vcvtdq2ps %s,%s\n", dis_buf
, nameYMMReg(rG
) );
11034 assign( rmode
, get_sse_roundingmode() );
11035 putYMMReg(rG
, binop(Iop_I32StoF32x8
, mkexpr(rmode
), mkexpr(argV
)));
11041 static Long
dis_PMOVMSKB_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11042 Long delta
, Bool isAvx
)
11044 UChar modrm
= getUChar(delta
);
11045 vassert(epartIsReg(modrm
)); /* ensured by caller */
11046 UInt rE
= eregOfRexRM(pfx
,modrm
);
11047 UInt rG
= gregOfRexRM(pfx
,modrm
);
11048 IRTemp t0
= newTemp(Ity_V128
);
11049 IRTemp t1
= newTemp(Ity_I32
);
11050 assign(t0
, getXMMReg(rE
));
11051 assign(t1
, unop(Iop_16Uto32
, unop(Iop_GetMSBs8x16
, mkexpr(t0
))));
11052 putIReg32(rG
, mkexpr(t1
));
11053 DIP("%spmovmskb %s,%s\n", isAvx
? "v" : "", nameXMMReg(rE
),
11060 static Long
dis_PMOVMSKB_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11063 UChar modrm
= getUChar(delta
);
11064 vassert(epartIsReg(modrm
)); /* ensured by caller */
11065 UInt rE
= eregOfRexRM(pfx
,modrm
);
11066 UInt rG
= gregOfRexRM(pfx
,modrm
);
11067 IRTemp t0
= newTemp(Ity_V128
);
11068 IRTemp t1
= newTemp(Ity_V128
);
11069 IRTemp t2
= newTemp(Ity_I16
);
11070 IRTemp t3
= newTemp(Ity_I16
);
11071 assign(t0
, getYMMRegLane128(rE
, 0));
11072 assign(t1
, getYMMRegLane128(rE
, 1));
11073 assign(t2
, unop(Iop_GetMSBs8x16
, mkexpr(t0
)));
11074 assign(t3
, unop(Iop_GetMSBs8x16
, mkexpr(t1
)));
11075 putIReg32(rG
, binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)));
11076 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
11082 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
11083 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
11084 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
11085 static IRTemp
math_UNPCKxPS_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11087 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11088 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11089 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11090 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11091 IRTemp res
= newTemp(Ity_V128
);
11092 assign(res
, xIsH
? mkV128from32s( s3
, d3
, s2
, d2
)
11093 : mkV128from32s( s1
, d1
, s0
, d0
));
11098 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11099 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11100 static IRTemp
math_UNPCKxPD_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11102 IRTemp s1
= newTemp(Ity_I64
);
11103 IRTemp s0
= newTemp(Ity_I64
);
11104 IRTemp d1
= newTemp(Ity_I64
);
11105 IRTemp d0
= newTemp(Ity_I64
);
11106 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11107 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11108 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11109 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11110 IRTemp res
= newTemp(Ity_V128
);
11111 assign(res
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
))
11112 : binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)));
11117 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11118 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11119 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11121 static IRTemp
math_UNPCKxPD_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11123 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11124 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11125 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
11126 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
11127 IRTemp res
= newTemp(Ity_V256
);
11129 ? IRExpr_Qop(Iop_64x4toV256
, mkexpr(s3
), mkexpr(d3
),
11130 mkexpr(s1
), mkexpr(d1
))
11131 : IRExpr_Qop(Iop_64x4toV256
, mkexpr(s2
), mkexpr(d2
),
11132 mkexpr(s0
), mkexpr(d0
)));
11137 /* FIXME: this is really bad. Surely can do something better here?
11138 One observation is that the steering in the upper and lower 128 bit
11139 halves is the same as with math_UNPCKxPS_128, so we simply split
11140 into two halves, and use that. Consequently any improvement in
11141 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11142 benefits this too. */
11143 static IRTemp
math_UNPCKxPS_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11145 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11146 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11147 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11148 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11149 IRTemp rVhi
= math_UNPCKxPS_128(sVhi
, dVhi
, xIsH
);
11150 IRTemp rVlo
= math_UNPCKxPS_128(sVlo
, dVlo
, xIsH
);
11151 IRTemp rV
= newTemp(Ity_V256
);
11152 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11157 static IRTemp
math_SHUFPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11159 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11160 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11161 vassert(imm8
< 256);
11163 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11164 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11166 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11167 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11168 IRTemp res
= newTemp(Ity_V128
);
11170 mkV128from32s( SELS((imm8
>>6)&3), SELS((imm8
>>4)&3),
11171 SELD((imm8
>>2)&3), SELD((imm8
>>0)&3) ) );
11178 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11179 identically. Hence do the clueless thing and use math_SHUFPS_128
11181 static IRTemp
math_SHUFPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11183 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11184 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11185 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11186 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11187 IRTemp rVhi
= math_SHUFPS_128(sVhi
, dVhi
, imm8
);
11188 IRTemp rVlo
= math_SHUFPS_128(sVlo
, dVlo
, imm8
);
11189 IRTemp rV
= newTemp(Ity_V256
);
11190 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11195 static IRTemp
math_SHUFPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11197 IRTemp s1
= newTemp(Ity_I64
);
11198 IRTemp s0
= newTemp(Ity_I64
);
11199 IRTemp d1
= newTemp(Ity_I64
);
11200 IRTemp d0
= newTemp(Ity_I64
);
11202 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11203 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11204 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11205 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11207 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11208 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11210 IRTemp res
= newTemp(Ity_V128
);
11211 assign(res
, binop( Iop_64HLtoV128
,
11212 SELS((imm8
>>1)&1), SELD((imm8
>>0)&1) ) );
11220 static IRTemp
math_SHUFPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11222 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11223 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11224 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11225 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11226 IRTemp rVhi
= math_SHUFPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11227 IRTemp rVlo
= math_SHUFPD_128(sVlo
, dVlo
, imm8
& 3);
11228 IRTemp rV
= newTemp(Ity_V256
);
11229 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11234 static IRTemp
math_BLENDPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11236 UShort imm8_mask_16
;
11237 IRTemp imm8_mask
= newTemp(Ity_V128
);
11239 switch( imm8
& 3 ) {
11240 case 0: imm8_mask_16
= 0x0000; break;
11241 case 1: imm8_mask_16
= 0x00FF; break;
11242 case 2: imm8_mask_16
= 0xFF00; break;
11243 case 3: imm8_mask_16
= 0xFFFF; break;
11244 default: vassert(0); break;
11246 assign( imm8_mask
, mkV128( imm8_mask_16
) );
11248 IRTemp res
= newTemp(Ity_V128
);
11249 assign ( res
, binop( Iop_OrV128
,
11250 binop( Iop_AndV128
, mkexpr(sV
),
11251 mkexpr(imm8_mask
) ),
11252 binop( Iop_AndV128
, mkexpr(dV
),
11253 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11258 static IRTemp
math_BLENDPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11260 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11261 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11262 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11263 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11264 IRTemp rVhi
= math_BLENDPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11265 IRTemp rVlo
= math_BLENDPD_128(sVlo
, dVlo
, imm8
& 3);
11266 IRTemp rV
= newTemp(Ity_V256
);
11267 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11272 static IRTemp
math_BLENDPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11274 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11275 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11276 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11278 IRTemp imm8_mask
= newTemp(Ity_V128
);
11279 assign( imm8_mask
, mkV128( imm8_perms
[ (imm8
& 15) ] ) );
11281 IRTemp res
= newTemp(Ity_V128
);
11282 assign ( res
, binop( Iop_OrV128
,
11283 binop( Iop_AndV128
, mkexpr(sV
),
11284 mkexpr(imm8_mask
) ),
11285 binop( Iop_AndV128
, mkexpr(dV
),
11286 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11291 static IRTemp
math_BLENDPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11293 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11294 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11295 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11296 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11297 IRTemp rVhi
= math_BLENDPS_128(sVhi
, dVhi
, (imm8
>> 4) & 15);
11298 IRTemp rVlo
= math_BLENDPS_128(sVlo
, dVlo
, imm8
& 15);
11299 IRTemp rV
= newTemp(Ity_V256
);
11300 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11305 static IRTemp
math_PBLENDW_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11307 /* Make w be a 16-bit version of imm8, formed by duplicating each
11311 for (i
= 0; i
< 8; i
++) {
11312 if (imm8
& (1 << i
))
11313 imm16
|= (3 << (2*i
));
11315 IRTemp imm16_mask
= newTemp(Ity_V128
);
11316 assign( imm16_mask
, mkV128( imm16
));
11318 IRTemp res
= newTemp(Ity_V128
);
11319 assign ( res
, binop( Iop_OrV128
,
11320 binop( Iop_AndV128
, mkexpr(sV
),
11321 mkexpr(imm16_mask
) ),
11322 binop( Iop_AndV128
, mkexpr(dV
),
11323 unop( Iop_NotV128
, mkexpr(imm16_mask
) ) ) ) );
11328 static IRTemp
math_PMULUDQ_128 ( IRTemp sV
, IRTemp dV
)
11330 /* This is a really poor translation -- could be improved if
11331 performance critical */
11332 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11333 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11334 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11335 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11336 IRTemp res
= newTemp(Ity_V128
);
11337 assign(res
, binop(Iop_64HLtoV128
,
11338 binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)),
11339 binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) ));
11344 static IRTemp
math_PMULUDQ_256 ( IRTemp sV
, IRTemp dV
)
11346 /* This is a really poor translation -- could be improved if
11347 performance critical */
11348 IRTemp sHi
, sLo
, dHi
, dLo
;
11349 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11350 breakupV256toV128s( dV
, &dHi
, &dLo
);
11351 breakupV256toV128s( sV
, &sHi
, &sLo
);
11352 IRTemp res
= newTemp(Ity_V256
);
11353 assign(res
, binop(Iop_V128HLtoV256
,
11354 mkexpr(math_PMULUDQ_128(sHi
, dHi
)),
11355 mkexpr(math_PMULUDQ_128(sLo
, dLo
))));
11360 static IRTemp
math_PMULDQ_128 ( IRTemp dV
, IRTemp sV
)
11362 /* This is a really poor translation -- could be improved if
11363 performance critical */
11364 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11365 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11366 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11367 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11368 IRTemp res
= newTemp(Ity_V128
);
11369 assign(res
, binop(Iop_64HLtoV128
,
11370 binop( Iop_MullS32
, mkexpr(d2
), mkexpr(s2
)),
11371 binop( Iop_MullS32
, mkexpr(d0
), mkexpr(s0
)) ));
11376 static IRTemp
math_PMULDQ_256 ( IRTemp sV
, IRTemp dV
)
11378 /* This is a really poor translation -- could be improved if
11379 performance critical */
11380 IRTemp sHi
, sLo
, dHi
, dLo
;
11381 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11382 breakupV256toV128s( dV
, &dHi
, &dLo
);
11383 breakupV256toV128s( sV
, &sHi
, &sLo
);
11384 IRTemp res
= newTemp(Ity_V256
);
11385 assign(res
, binop(Iop_V128HLtoV256
,
11386 mkexpr(math_PMULDQ_128(sHi
, dHi
)),
11387 mkexpr(math_PMULDQ_128(sLo
, dLo
))));
11392 static IRTemp
math_PMADDWD_128 ( IRTemp dV
, IRTemp sV
)
11394 IRTemp sVhi
, sVlo
, dVhi
, dVlo
;
11395 IRTemp resHi
= newTemp(Ity_I64
);
11396 IRTemp resLo
= newTemp(Ity_I64
);
11397 sVhi
= sVlo
= dVhi
= dVlo
= IRTemp_INVALID
;
11398 breakupV128to64s( sV
, &sVhi
, &sVlo
);
11399 breakupV128to64s( dV
, &dVhi
, &dVlo
);
11400 assign( resHi
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11401 "amd64g_calculate_mmx_pmaddwd",
11402 &amd64g_calculate_mmx_pmaddwd
,
11403 mkIRExprVec_2( mkexpr(sVhi
), mkexpr(dVhi
))));
11404 assign( resLo
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11405 "amd64g_calculate_mmx_pmaddwd",
11406 &amd64g_calculate_mmx_pmaddwd
,
11407 mkIRExprVec_2( mkexpr(sVlo
), mkexpr(dVlo
))));
11408 IRTemp res
= newTemp(Ity_V128
);
11409 assign( res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
))) ;
11414 static IRTemp
math_PMADDWD_256 ( IRTemp dV
, IRTemp sV
)
11416 IRTemp sHi
, sLo
, dHi
, dLo
;
11417 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11418 breakupV256toV128s( dV
, &dHi
, &dLo
);
11419 breakupV256toV128s( sV
, &sHi
, &sLo
);
11420 IRTemp res
= newTemp(Ity_V256
);
11421 assign(res
, binop(Iop_V128HLtoV256
,
11422 mkexpr(math_PMADDWD_128(dHi
, sHi
)),
11423 mkexpr(math_PMADDWD_128(dLo
, sLo
))));
11428 static IRTemp
math_ADDSUBPD_128 ( IRTemp dV
, IRTemp sV
)
11430 IRTemp addV
= newTemp(Ity_V128
);
11431 IRTemp subV
= newTemp(Ity_V128
);
11432 IRTemp a1
= newTemp(Ity_I64
);
11433 IRTemp s0
= newTemp(Ity_I64
);
11434 IRTemp rm
= newTemp(Ity_I32
);
11436 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11437 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11438 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11440 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11441 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11443 IRTemp res
= newTemp(Ity_V128
);
11444 assign( res
, binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11449 static IRTemp
math_ADDSUBPD_256 ( IRTemp dV
, IRTemp sV
)
11451 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11452 IRTemp addV
= newTemp(Ity_V256
);
11453 IRTemp subV
= newTemp(Ity_V256
);
11454 IRTemp rm
= newTemp(Ity_I32
);
11455 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11457 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11458 assign( addV
, triop(Iop_Add64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11459 assign( subV
, triop(Iop_Sub64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11461 breakupV256to64s( addV
, &a3
, &a2
, &a1
, &a0
);
11462 breakupV256to64s( subV
, &s3
, &s2
, &s1
, &s0
);
11464 IRTemp res
= newTemp(Ity_V256
);
11465 assign( res
, mkV256from64s( a3
, s2
, a1
, s0
) );
11470 static IRTemp
math_ADDSUBPS_128 ( IRTemp dV
, IRTemp sV
)
11472 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11473 IRTemp addV
= newTemp(Ity_V128
);
11474 IRTemp subV
= newTemp(Ity_V128
);
11475 IRTemp rm
= newTemp(Ity_I32
);
11476 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11478 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11479 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11480 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11482 breakupV128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11483 breakupV128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11485 IRTemp res
= newTemp(Ity_V128
);
11486 assign( res
, mkV128from32s( a3
, s2
, a1
, s0
) );
11491 static IRTemp
math_ADDSUBPS_256 ( IRTemp dV
, IRTemp sV
)
11493 IRTemp a7
, a6
, a5
, a4
, a3
, a2
, a1
, a0
;
11494 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
11495 IRTemp addV
= newTemp(Ity_V256
);
11496 IRTemp subV
= newTemp(Ity_V256
);
11497 IRTemp rm
= newTemp(Ity_I32
);
11498 a7
= a6
= a5
= a4
= a3
= a2
= a1
= a0
= IRTemp_INVALID
;
11499 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11501 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11502 assign( addV
, triop(Iop_Add32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11503 assign( subV
, triop(Iop_Sub32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11505 breakupV256to32s( addV
, &a7
, &a6
, &a5
, &a4
, &a3
, &a2
, &a1
, &a0
);
11506 breakupV256to32s( subV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
11508 IRTemp res
= newTemp(Ity_V256
);
11509 assign( res
, mkV256from32s( a7
, s6
, a5
, s4
, a3
, s2
, a1
, s0
) );
11514 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11515 static Long
dis_PSHUFxW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11516 Long delta
, Bool isAvx
, Bool xIsH
)
11518 IRTemp addr
= IRTemp_INVALID
;
11521 UChar modrm
= getUChar(delta
);
11522 UInt rG
= gregOfRexRM(pfx
,modrm
);
11524 IRTemp sVmut
, dVmut
, sVcon
, sV
, dV
, s3
, s2
, s1
, s0
;
11525 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11526 sV
= newTemp(Ity_V128
);
11527 dV
= newTemp(Ity_V128
);
11528 sVmut
= newTemp(Ity_I64
);
11529 dVmut
= newTemp(Ity_I64
);
11530 sVcon
= newTemp(Ity_I64
);
11531 if (epartIsReg(modrm
)) {
11532 UInt rE
= eregOfRexRM(pfx
,modrm
);
11533 assign( sV
, getXMMReg(rE
) );
11534 imm8
= (UInt
)getUChar(delta
+1);
11536 DIP("%spshuf%cw $%u,%s,%s\n",
11537 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11538 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
11540 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11541 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11542 imm8
= (UInt
)getUChar(delta
+alen
);
11544 DIP("%spshuf%cw $%u,%s,%s\n",
11545 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11546 imm8
, dis_buf
, nameXMMReg(rG
));
11549 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11551 assign( sVmut
, unop(xIsH
? Iop_V128HIto64
: Iop_V128to64
, mkexpr(sV
)) );
11552 assign( sVcon
, unop(xIsH
? Iop_V128to64
: Iop_V128HIto64
, mkexpr(sV
)) );
11554 breakup64to16s( sVmut
, &s3
, &s2
, &s1
, &s0
);
11556 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11557 assign(dVmut
, mk64from16s( SEL((imm8
>>6)&3), SEL((imm8
>>4)&3),
11558 SEL((imm8
>>2)&3), SEL((imm8
>>0)&3) ));
11561 assign(dV
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(dVmut
), mkexpr(sVcon
))
11562 : binop(Iop_64HLtoV128
, mkexpr(sVcon
), mkexpr(dVmut
)) );
11564 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)(rG
, mkexpr(dV
));
11569 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11570 static Long
dis_PSHUFxW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11571 Long delta
, Bool xIsH
)
11573 IRTemp addr
= IRTemp_INVALID
;
11576 UChar modrm
= getUChar(delta
);
11577 UInt rG
= gregOfRexRM(pfx
,modrm
);
11579 IRTemp sV
, s
[8], sV64
[4], dVhi
, dVlo
;
11580 sV64
[3] = sV64
[2] = sV64
[1] = sV64
[0] = IRTemp_INVALID
;
11581 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
11582 sV
= newTemp(Ity_V256
);
11583 dVhi
= newTemp(Ity_I64
);
11584 dVlo
= newTemp(Ity_I64
);
11585 if (epartIsReg(modrm
)) {
11586 UInt rE
= eregOfRexRM(pfx
,modrm
);
11587 assign( sV
, getYMMReg(rE
) );
11588 imm8
= (UInt
)getUChar(delta
+1);
11590 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11591 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
11593 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11594 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
11595 imm8
= (UInt
)getUChar(delta
+alen
);
11597 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11598 imm8
, dis_buf
, nameYMMReg(rG
));
11601 breakupV256to64s( sV
, &sV64
[3], &sV64
[2], &sV64
[1], &sV64
[0] );
11602 breakup64to16s( sV64
[xIsH
? 3 : 2], &s
[7], &s
[6], &s
[5], &s
[4] );
11603 breakup64to16s( sV64
[xIsH
? 1 : 0], &s
[3], &s
[2], &s
[1], &s
[0] );
11605 assign( dVhi
, mk64from16s( s
[4 + ((imm8
>>6)&3)], s
[4 + ((imm8
>>4)&3)],
11606 s
[4 + ((imm8
>>2)&3)], s
[4 + ((imm8
>>0)&3)] ) );
11607 assign( dVlo
, mk64from16s( s
[0 + ((imm8
>>6)&3)], s
[0 + ((imm8
>>4)&3)],
11608 s
[0 + ((imm8
>>2)&3)], s
[0 + ((imm8
>>0)&3)] ) );
11609 putYMMReg( rG
, mkV256from64s( xIsH
? dVhi
: sV64
[3],
11610 xIsH
? sV64
[2] : dVhi
,
11611 xIsH
? dVlo
: sV64
[1],
11612 xIsH
? sV64
[0] : dVlo
) );
11617 static Long
dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo
* vbi
, Prefix pfx
,
11618 Long delta
, Bool isAvx
)
11620 Long deltaIN
= delta
;
11621 UChar modrm
= getUChar(delta
);
11622 UInt rG
= gregOfRexRM(pfx
,modrm
);
11623 IRTemp sV
= newTemp(Ity_V128
);
11624 IRTemp d16
= newTemp(Ity_I16
);
11626 IRTemp s0
, s1
, s2
, s3
;
11627 if (epartIsReg(modrm
)) {
11628 UInt rE
= eregOfRexRM(pfx
,modrm
);
11629 assign(sV
, getXMMReg(rE
));
11630 imm8
= getUChar(delta
+1) & 7;
11632 DIP("%spextrw $%u,%s,%s\n", isAvx
? "v" : "",
11633 imm8
, nameXMMReg(rE
), nameIReg32(rG
));
11635 /* The memory case is disallowed, apparently. */
11636 return deltaIN
; /* FAIL */
11638 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11639 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11641 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(s0
))); break;
11642 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(s0
))); break;
11643 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(s1
))); break;
11644 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(s1
))); break;
11645 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(s2
))); break;
11646 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(s2
))); break;
11647 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(s3
))); break;
11648 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(s3
))); break;
11649 default: vassert(0);
11651 putIReg32(rG
, unop(Iop_16Uto32
, mkexpr(d16
)));
11656 static Long
dis_CVTDQ2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11657 Long delta
, Bool isAvx
)
11659 IRTemp addr
= IRTemp_INVALID
;
11662 UChar modrm
= getUChar(delta
);
11663 IRTemp arg64
= newTemp(Ity_I64
);
11664 UInt rG
= gregOfRexRM(pfx
,modrm
);
11665 const HChar
* mbV
= isAvx
? "v" : "";
11666 if (epartIsReg(modrm
)) {
11667 UInt rE
= eregOfRexRM(pfx
,modrm
);
11668 assign( arg64
, getXMMRegLane64(rE
, 0) );
11670 DIP("%scvtdq2pd %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
11672 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11673 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
11675 DIP("%scvtdq2pd %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
11679 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
11683 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
11686 putYMMRegLane128(rG
, 1, mkV128(0));
11691 static Long
dis_STMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11692 Long delta
, Bool isAvx
)
11694 IRTemp addr
= IRTemp_INVALID
;
11697 UChar modrm
= getUChar(delta
);
11698 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11699 vassert(gregOfRexRM(pfx
,modrm
) == 3); /* ditto */
11701 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11704 /* Fake up a native SSE mxcsr word. The only thing it depends on
11705 is SSEROUND[1:0], so call a clean helper to cook it up.
11707 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11708 DIP("%sstmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11713 Ity_I64
, 0/*regp*/,
11714 "amd64g_create_mxcsr", &amd64g_create_mxcsr
,
11715 mkIRExprVec_1( unop(Iop_32Uto64
,get_sse_roundingmode()) )
11723 static Long
dis_LDMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11724 Long delta
, Bool isAvx
)
11726 IRTemp addr
= IRTemp_INVALID
;
11729 UChar modrm
= getUChar(delta
);
11730 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11731 vassert(gregOfRexRM(pfx
,modrm
) == 2); /* ditto */
11733 IRTemp t64
= newTemp(Ity_I64
);
11734 IRTemp ew
= newTemp(Ity_I32
);
11736 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11738 DIP("%sldmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11740 /* The only thing we observe in %mxcsr is the rounding mode.
11741 Therefore, pass the 32-bit value (SSE native-format control
11742 word) to a clean helper, getting back a 64-bit value, the
11743 lower half of which is the SSEROUND value to store, and the
11744 upper half of which is the emulation-warning token which may
11747 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11748 assign( t64
, mkIRExprCCall(
11749 Ity_I64
, 0/*regparms*/,
11750 "amd64g_check_ldmxcsr",
11751 &amd64g_check_ldmxcsr
,
11754 loadLE(Ity_I32
, mkexpr(addr
))
11760 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
11761 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
11762 put_emwarn( mkexpr(ew
) );
11763 /* Finally, if an emulation warning was reported, side-exit to
11764 the next insn, reporting the warning, so that Valgrind's
11765 dispatcher sees the warning. */
11768 binop(Iop_CmpNE64
, unop(Iop_32Uto64
,mkexpr(ew
)), mkU64(0)),
11770 IRConst_U64(guest_RIP_bbstart
+delta
),
11778 static void gen_XSAVE_SEQUENCE ( IRTemp addr
, IRTemp rfbm
)
11780 /* ------ rfbm[0] gates the x87 state ------ */
11782 /* Uses dirty helper:
11783 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11785 IRDirty
* d0
= unsafeIRDirty_0_N (
11787 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11788 &amd64g_dirtyhelper_XSAVE_COMPONENT_0
,
11789 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11791 d0
->guard
= binop(Iop_CmpEQ64
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)),
11794 /* Declare we're writing memory. Really, bytes 24 through 31
11795 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11796 than 1 memory area here, so just mark the whole thing as
11798 d0
->mFx
= Ifx_Write
;
11799 d0
->mAddr
= mkexpr(addr
);
11802 /* declare we're reading guest state */
11804 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
11806 d0
->fxState
[0].fx
= Ifx_Read
;
11807 d0
->fxState
[0].offset
= OFFB_FTOP
;
11808 d0
->fxState
[0].size
= sizeof(UInt
);
11810 d0
->fxState
[1].fx
= Ifx_Read
;
11811 d0
->fxState
[1].offset
= OFFB_FPREGS
;
11812 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
11814 d0
->fxState
[2].fx
= Ifx_Read
;
11815 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
11816 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
11818 d0
->fxState
[3].fx
= Ifx_Read
;
11819 d0
->fxState
[3].offset
= OFFB_FPROUND
;
11820 d0
->fxState
[3].size
= sizeof(ULong
);
11822 d0
->fxState
[4].fx
= Ifx_Read
;
11823 d0
->fxState
[4].offset
= OFFB_FC3210
;
11824 d0
->fxState
[4].size
= sizeof(ULong
);
11826 stmt( IRStmt_Dirty(d0
) );
11828 /* ------ rfbm[1] gates the SSE state ------ */
11830 IRTemp rfbm_1
= newTemp(Ity_I64
);
11831 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
11832 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
11833 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
11835 IRExpr
* guard_1
= binop(Iop_CmpEQ64
, mkexpr(rfbm_1
), mkU64(2));
11836 IRExpr
* guard_1or2
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
11838 /* Uses dirty helper:
11839 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11840 ( VexGuestAMD64State*, ULong )
11841 This creates only MXCSR and MXCSR_MASK. We need to do this if
11842 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11843 guard condition is a bit more complex.
11845 IRDirty
* d1
= unsafeIRDirty_0_N (
11847 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11848 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
,
11849 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11851 d1
->guard
= guard_1or2
;
11853 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11854 the code for rbfm[0] just above claims a write of 0 .. 159, so
11855 this duplicates it. But at least correctly connects 24 .. 31 to
11856 the MXCSR guest state representation (SSEROUND field). */
11857 d1
->mFx
= Ifx_Write
;
11858 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
11861 /* declare we're reading guest state */
11863 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
11865 d1
->fxState
[0].fx
= Ifx_Read
;
11866 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
11867 d1
->fxState
[0].size
= sizeof(ULong
);
11869 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11870 else. We do the actual register array, XMM[0..15], separately,
11871 in order that any undefinedness in the XMM registers is tracked
11872 separately by Memcheck and does not "infect" the in-memory
11873 shadow for the other parts of the image. */
11874 stmt( IRStmt_Dirty(d1
) );
11876 /* And now the XMMs themselves. */
11878 for (reg
= 0; reg
< 16; reg
++) {
11879 stmt( IRStmt_StoreG(
11881 binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16)),
11887 /* ------ rfbm[2] gates the AVX state ------ */
11888 /* Component 2 is just a bunch of register saves, so we'll do it
11889 inline, just to be simple and to be Memcheck friendly. */
11891 IRTemp rfbm_2
= newTemp(Ity_I64
);
11892 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
11894 IRExpr
* guard_2
= binop(Iop_CmpEQ64
, mkexpr(rfbm_2
), mkU64(4));
11896 for (reg
= 0; reg
< 16; reg
++) {
11897 stmt( IRStmt_StoreG(
11899 binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16)),
11900 getYMMRegLane128(reg
,1),
11907 static Long
dis_XSAVE ( const VexAbiInfo
* vbi
,
11908 Prefix pfx
, Long delta
, Int sz
)
11910 /* Note that the presence or absence of REX.W (indicated here by
11911 |sz|) slightly affects the written format: whether the saved FPU
11912 IP and DP pointers are 64 or 32 bits. But the helper function
11913 we call simply writes zero bits in the relevant fields, which
11914 are 64 bits regardless of what REX.W is, and so it's good enough
11915 (iow, equally broken) in both cases. */
11916 IRTemp addr
= IRTemp_INVALID
;
11919 UChar modrm
= getUChar(delta
);
11920 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11921 vassert(sz
== 4 || sz
== 8); /* ditto */
11923 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11925 gen_SIGNAL_if_not_64_aligned(vbi
, addr
);
11927 DIP("%sxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11929 /* VEX's caller is assumed to have checked this. */
11930 const ULong aSSUMED_XCR0_VALUE
= 7;
11932 IRTemp rfbm
= newTemp(Ity_I64
);
11937 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
11938 unop(Iop_32Uto64
, getIRegRAX(4))),
11939 mkU64(aSSUMED_XCR0_VALUE
)));
11941 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11943 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11944 OR-ing the RFBM value into it. */
11945 IRTemp addr_plus_512
= newTemp(Ity_I64
);
11946 assign(addr_plus_512
, binop(Iop_Add64
, mkexpr(addr
), mkU64(512)));
11947 storeLE( mkexpr(addr_plus_512
),
11949 unop(Iop_64to8
, mkexpr(rfbm
)),
11950 loadLE(Ity_I8
, mkexpr(addr_plus_512
))) );
11956 static Long
dis_FXSAVE ( const VexAbiInfo
* vbi
,
11957 Prefix pfx
, Long delta
, Int sz
)
11959 /* See comment in dis_XSAVE about the significance of REX.W. */
11960 IRTemp addr
= IRTemp_INVALID
;
11963 UChar modrm
= getUChar(delta
);
11964 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11965 vassert(sz
== 4 || sz
== 8); /* ditto */
11967 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11969 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
11971 DIP("%sfxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11973 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11974 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11975 fold out the unused (AVX) parts accordingly. */
11976 IRTemp rfbm
= newTemp(Ity_I64
);
11977 assign(rfbm
, mkU64(3));
11978 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11984 static void gen_XRSTOR_SEQUENCE ( IRTemp addr
, IRTemp xstate_bv
, IRTemp rfbm
)
11986 /* ------ rfbm[0] gates the x87 state ------ */
11988 /* If rfbm[0] == 1, we have to write the x87 state. If
11989 xstate_bv[0] == 1, we will read it from the memory image, else
11990 we'll set it to initial values. Doing this with a helper
11991 function and getting the definedness flow annotations correct is
11992 too difficult, so generate stupid but simple code: first set the
11993 registers to initial values, regardless of xstate_bv[0]. Then,
11994 conditionally restore from the memory image. */
11996 IRTemp rfbm_0
= newTemp(Ity_I64
);
11997 IRTemp xstate_bv_0
= newTemp(Ity_I64
);
11998 IRTemp restore_0
= newTemp(Ity_I64
);
11999 assign(rfbm_0
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)));
12000 assign(xstate_bv_0
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(1)));
12001 assign(restore_0
, binop(Iop_And64
, mkexpr(rfbm_0
), mkexpr(xstate_bv_0
)));
12003 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64
, mkexpr(rfbm_0
), mkU64(0)) );
12005 /* Uses dirty helper:
12006 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
12008 IRDirty
* d0
= unsafeIRDirty_0_N (
12010 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
12011 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0
,
12012 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
12014 d0
->guard
= binop(Iop_CmpNE64
, mkexpr(restore_0
), mkU64(0));
12016 /* Declare we're reading memory. Really, bytes 24 through 31
12017 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
12018 than 1 memory area here, so just mark the whole thing as
12020 d0
->mFx
= Ifx_Read
;
12021 d0
->mAddr
= mkexpr(addr
);
12024 /* declare we're writing guest state */
12026 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
12028 d0
->fxState
[0].fx
= Ifx_Write
;
12029 d0
->fxState
[0].offset
= OFFB_FTOP
;
12030 d0
->fxState
[0].size
= sizeof(UInt
);
12032 d0
->fxState
[1].fx
= Ifx_Write
;
12033 d0
->fxState
[1].offset
= OFFB_FPREGS
;
12034 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
12036 d0
->fxState
[2].fx
= Ifx_Write
;
12037 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
12038 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
12040 d0
->fxState
[3].fx
= Ifx_Write
;
12041 d0
->fxState
[3].offset
= OFFB_FPROUND
;
12042 d0
->fxState
[3].size
= sizeof(ULong
);
12044 d0
->fxState
[4].fx
= Ifx_Write
;
12045 d0
->fxState
[4].offset
= OFFB_FC3210
;
12046 d0
->fxState
[4].size
= sizeof(ULong
);
12048 stmt( IRStmt_Dirty(d0
) );
12050 /* ------ rfbm[1] gates the SSE state ------ */
12052 /* Same scheme as component 0: first zero it out, and then possibly
12053 restore from the memory area. */
12054 IRTemp rfbm_1
= newTemp(Ity_I64
);
12055 IRTemp xstate_bv_1
= newTemp(Ity_I64
);
12056 IRTemp restore_1
= newTemp(Ity_I64
);
12057 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
12058 assign(xstate_bv_1
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(2)));
12059 assign(restore_1
, binop(Iop_And64
, mkexpr(rfbm_1
), mkexpr(xstate_bv_1
)));
12060 IRExpr
* rfbm_1e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1
), mkU64(0));
12061 IRExpr
* restore_1e
= binop(Iop_CmpNE64
, mkexpr(restore_1
), mkU64(0));
12063 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
12064 IRTemp xstate_bv_1or2
= newTemp(Ity_I64
);
12065 IRTemp restore_1or2
= newTemp(Ity_I64
);
12066 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
12067 assign(xstate_bv_1or2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(6)));
12068 assign(restore_1or2
, binop(Iop_And64
, mkexpr(rfbm_1or2
),
12069 mkexpr(xstate_bv_1or2
)));
12070 IRExpr
* rfbm_1or2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
12071 IRExpr
* restore_1or2e
= binop(Iop_CmpNE64
, mkexpr(restore_1or2
), mkU64(0));
12073 /* The areas in question are: SSEROUND, and the XMM register array. */
12074 putGuarded(OFFB_SSEROUND
, rfbm_1or2e
, mkU64(Irrm_NEAREST
));
12077 for (reg
= 0; reg
< 16; reg
++) {
12078 putGuarded(xmmGuestRegOffset(reg
), rfbm_1e
, mkV128(0));
12081 /* And now possibly restore from MXCSR/MXCSR_MASK */
12082 /* Uses dirty helper:
12083 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
12084 ( VexGuestAMD64State*, ULong )
12085 This restores from only MXCSR and MXCSR_MASK. We need to do
12086 this if either components 1 (SSE) or 2 (AVX) are requested.
12087 Hence the guard condition is a bit more complex.
12089 IRDirty
* d1
= unsafeIRDirty_0_N (
12091 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12092 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
,
12093 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
12095 d1
->guard
= restore_1or2e
;
12097 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12098 the code for rbfm[0] just above claims a read of 0 .. 159, so
12099 this duplicates it. But at least correctly connects 24 .. 31 to
12100 the MXCSR guest state representation (SSEROUND field). */
12101 d1
->mFx
= Ifx_Read
;
12102 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
12105 /* declare we're writing guest state */
12107 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
12109 d1
->fxState
[0].fx
= Ifx_Write
;
12110 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
12111 d1
->fxState
[0].size
= sizeof(ULong
);
12113 /* Call the helper. This creates SSEROUND but nothing
12114 else. We do the actual register array, XMM[0..15], separately,
12115 in order that any undefinedness in the XMM registers is tracked
12116 separately by Memcheck and is not "infected" by the in-memory
12117 shadow for the other parts of the image. */
12118 stmt( IRStmt_Dirty(d1
) );
12120 /* And now the XMMs themselves. For each register, we PUT either
12121 its old value, or the value loaded from memory. One convenient
12122 way to do that is with a conditional load that has its the
12123 default value, the old value of the register. */
12124 for (reg
= 0; reg
< 16; reg
++) {
12125 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16));
12126 IRExpr
* alt
= getXMMReg(reg
);
12127 IRTemp loadedValue
= newTemp(Ity_V128
);
12128 stmt( IRStmt_LoadG(Iend_LE
,
12130 loadedValue
, ea
, alt
, restore_1e
) );
12131 putXMMReg(reg
, mkexpr(loadedValue
));
12134 /* ------ rfbm[2] gates the AVX state ------ */
12135 /* Component 2 is just a bunch of register loads, so we'll do it
12136 inline, just to be simple and to be Memcheck friendly. */
12138 /* Same scheme as component 0: first zero it out, and then possibly
12139 restore from the memory area. */
12140 IRTemp rfbm_2
= newTemp(Ity_I64
);
12141 IRTemp xstate_bv_2
= newTemp(Ity_I64
);
12142 IRTemp restore_2
= newTemp(Ity_I64
);
12143 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
12144 assign(xstate_bv_2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(4)));
12145 assign(restore_2
, binop(Iop_And64
, mkexpr(rfbm_2
), mkexpr(xstate_bv_2
)));
12147 IRExpr
* rfbm_2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_2
), mkU64(0));
12148 IRExpr
* restore_2e
= binop(Iop_CmpNE64
, mkexpr(restore_2
), mkU64(0));
12150 for (reg
= 0; reg
< 16; reg
++) {
12151 putGuarded(ymmGuestRegLane128offset(reg
, 1), rfbm_2e
, mkV128(0));
12154 for (reg
= 0; reg
< 16; reg
++) {
12155 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16));
12156 IRExpr
* alt
= getYMMRegLane128(reg
, 1);
12157 IRTemp loadedValue
= newTemp(Ity_V128
);
12158 stmt( IRStmt_LoadG(Iend_LE
,
12160 loadedValue
, ea
, alt
, restore_2e
) );
12161 putYMMRegLane128(reg
, 1, mkexpr(loadedValue
));
12166 static Long
dis_XRSTOR ( const VexAbiInfo
* vbi
,
12167 Prefix pfx
, Long delta
, Int sz
)
12169 /* As with XRSTOR above we ignore the value of REX.W since we're
12170 not bothering with the FPU DP and IP fields. */
12171 IRTemp addr
= IRTemp_INVALID
;
12174 UChar modrm
= getUChar(delta
);
12175 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12176 vassert(sz
== 4 || sz
== 8); /* ditto */
12178 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12180 gen_SIGNAL_if_not_64_aligned(vbi
, addr
);
12182 DIP("%sxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12184 /* VEX's caller is assumed to have checked this. */
12185 const ULong aSSUMED_XCR0_VALUE
= 7;
12187 IRTemp rfbm
= newTemp(Ity_I64
);
12192 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
12193 unop(Iop_32Uto64
, getIRegRAX(4))),
12194 mkU64(aSSUMED_XCR0_VALUE
)));
12196 IRTemp xstate_bv
= newTemp(Ity_I64
);
12197 assign(xstate_bv
, loadLE(Ity_I64
,
12198 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+0))));
12200 IRTemp xcomp_bv
= newTemp(Ity_I64
);
12201 assign(xcomp_bv
, loadLE(Ity_I64
,
12202 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+8))));
12204 IRTemp xsavehdr_23_16
= newTemp(Ity_I64
);
12205 assign( xsavehdr_23_16
,
12207 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+16))));
12209 /* We must fault if
12210 * xcomp_bv[63] == 1, since this simulated CPU does not support
12211 the compaction extension.
12212 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12213 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12214 imply that xcomp_bv must be zero.
12215 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12217 IRTemp fault_if_nonzero
= newTemp(Ity_I64
);
12218 assign(fault_if_nonzero
,
12220 binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(~aSSUMED_XCR0_VALUE
)),
12221 binop(Iop_Or64
, mkexpr(xcomp_bv
), mkexpr(xsavehdr_23_16
))));
12222 stmt( IRStmt_Exit(binop(Iop_CmpNE64
, mkexpr(fault_if_nonzero
), mkU64(0)),
12224 IRConst_U64(guest_RIP_curr_instr
),
12228 /* We are guaranteed now that both xstate_bv and rfbm are in the
12229 range 0 .. 7. Generate the restore sequence proper. */
12230 gen_XRSTOR_SEQUENCE(addr
, xstate_bv
, rfbm
);
12236 static Long
dis_FXRSTOR ( const VexAbiInfo
* vbi
,
12237 Prefix pfx
, Long delta
, Int sz
)
12239 /* As with FXSAVE above we ignore the value of REX.W since we're
12240 not bothering with the FPU DP and IP fields. */
12241 IRTemp addr
= IRTemp_INVALID
;
12244 UChar modrm
= getUChar(delta
);
12245 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12246 vassert(sz
== 4 || sz
== 8); /* ditto */
12248 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12250 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
12252 DIP("%sfxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12254 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12255 as if components 0 and 1 are set as present in XSTATE_BV in the
12256 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12257 generate the XRSTOR sequence accordingly, and let iropt fold out
12258 the unused (AVX) parts accordingly. */
12259 IRTemp three
= newTemp(Ity_I64
);
12260 assign(three
, mkU64(3));
12261 gen_XRSTOR_SEQUENCE(addr
, three
/*xstate_bv*/, three
/*rfbm*/);
12267 static IRTemp
math_PINSRW_128 ( IRTemp v128
, IRTemp u16
, UInt imm8
)
12269 vassert(imm8
<= 7);
12271 // Create a V128 value which has the selected word in the
12272 // specified lane, and zeroes everywhere else.
12273 IRTemp tmp128
= newTemp(Ity_V128
);
12274 IRTemp halfshift
= newTemp(Ity_I64
);
12275 assign(halfshift
, binop(Iop_Shl64
,
12276 unop(Iop_16Uto64
, mkexpr(u16
)),
12277 mkU8(16 * (imm8
& 3))));
12279 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
12281 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
12284 UShort mask
= ~(3 << (imm8
* 2));
12285 IRTemp res
= newTemp(Ity_V128
);
12286 assign( res
, binop(Iop_OrV128
,
12288 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
12293 static IRTemp
math_PSADBW_128 ( IRTemp dV
, IRTemp sV
)
12295 IRTemp s1
, s0
, d1
, d0
;
12296 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
12298 breakupV128to64s( sV
, &s1
, &s0
);
12299 breakupV128to64s( dV
, &d1
, &d0
);
12301 IRTemp res
= newTemp(Ity_V128
);
12303 binop(Iop_64HLtoV128
,
12304 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12305 "amd64g_calculate_mmx_psadbw",
12306 &amd64g_calculate_mmx_psadbw
,
12307 mkIRExprVec_2( mkexpr(s1
), mkexpr(d1
))),
12308 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12309 "amd64g_calculate_mmx_psadbw",
12310 &amd64g_calculate_mmx_psadbw
,
12311 mkIRExprVec_2( mkexpr(s0
), mkexpr(d0
)))) );
12316 static IRTemp
math_PSADBW_256 ( IRTemp dV
, IRTemp sV
)
12318 IRTemp sHi
, sLo
, dHi
, dLo
;
12319 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
12320 breakupV256toV128s( dV
, &dHi
, &dLo
);
12321 breakupV256toV128s( sV
, &sHi
, &sLo
);
12322 IRTemp res
= newTemp(Ity_V256
);
12323 assign(res
, binop(Iop_V128HLtoV256
,
12324 mkexpr(math_PSADBW_128(dHi
, sHi
)),
12325 mkexpr(math_PSADBW_128(dLo
, sLo
))));
12330 static Long
dis_MASKMOVDQU ( const VexAbiInfo
* vbi
, Prefix pfx
,
12331 Long delta
, Bool isAvx
)
12333 IRTemp regD
= newTemp(Ity_V128
);
12334 IRTemp mask
= newTemp(Ity_V128
);
12335 IRTemp olddata
= newTemp(Ity_V128
);
12336 IRTemp newdata
= newTemp(Ity_V128
);
12337 IRTemp addr
= newTemp(Ity_I64
);
12338 UChar modrm
= getUChar(delta
);
12339 UInt rG
= gregOfRexRM(pfx
,modrm
);
12340 UInt rE
= eregOfRexRM(pfx
,modrm
);
12342 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
12343 assign( regD
, getXMMReg( rG
));
12345 /* Unfortunately can't do the obvious thing with SarN8x16
12346 here since that can't be re-emitted as SSE2 code - no such
12349 binop(Iop_64HLtoV128
,
12351 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ),
12354 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ),
12356 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
12357 assign( newdata
, binop(Iop_OrV128
,
12363 unop(Iop_NotV128
, mkexpr(mask
)))) );
12364 storeLE( mkexpr(addr
), mkexpr(newdata
) );
12367 DIP("%smaskmovdqu %s,%s\n", isAvx
? "v" : "",
12368 nameXMMReg(rE
), nameXMMReg(rG
) );
12373 static Long
dis_MOVMSKPS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12374 Long delta
, Bool isAvx
)
12376 UChar modrm
= getUChar(delta
);
12377 UInt rG
= gregOfRexRM(pfx
,modrm
);
12378 UInt rE
= eregOfRexRM(pfx
,modrm
);
12379 IRTemp t0
= newTemp(Ity_I32
);
12380 IRTemp t1
= newTemp(Ity_I32
);
12381 IRTemp t2
= newTemp(Ity_I32
);
12382 IRTemp t3
= newTemp(Ity_I32
);
12384 assign( t0
, binop( Iop_And32
,
12385 binop(Iop_Shr32
, getXMMRegLane32(rE
,0), mkU8(31)),
12387 assign( t1
, binop( Iop_And32
,
12388 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(30)),
12390 assign( t2
, binop( Iop_And32
,
12391 binop(Iop_Shr32
, getXMMRegLane32(rE
,2), mkU8(29)),
12393 assign( t3
, binop( Iop_And32
,
12394 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(28)),
12396 putIReg32( rG
, binop(Iop_Or32
,
12397 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12398 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12399 DIP("%smovmskps %s,%s\n", isAvx
? "v" : "",
12400 nameXMMReg(rE
), nameIReg32(rG
));
12405 static Long
dis_MOVMSKPS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12407 UChar modrm
= getUChar(delta
);
12408 UInt rG
= gregOfRexRM(pfx
,modrm
);
12409 UInt rE
= eregOfRexRM(pfx
,modrm
);
12410 IRTemp t0
= newTemp(Ity_I32
);
12411 IRTemp t1
= newTemp(Ity_I32
);
12412 IRTemp t2
= newTemp(Ity_I32
);
12413 IRTemp t3
= newTemp(Ity_I32
);
12414 IRTemp t4
= newTemp(Ity_I32
);
12415 IRTemp t5
= newTemp(Ity_I32
);
12416 IRTemp t6
= newTemp(Ity_I32
);
12417 IRTemp t7
= newTemp(Ity_I32
);
12419 assign( t0
, binop( Iop_And32
,
12420 binop(Iop_Shr32
, getYMMRegLane32(rE
,0), mkU8(31)),
12422 assign( t1
, binop( Iop_And32
,
12423 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(30)),
12425 assign( t2
, binop( Iop_And32
,
12426 binop(Iop_Shr32
, getYMMRegLane32(rE
,2), mkU8(29)),
12428 assign( t3
, binop( Iop_And32
,
12429 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(28)),
12431 assign( t4
, binop( Iop_And32
,
12432 binop(Iop_Shr32
, getYMMRegLane32(rE
,4), mkU8(27)),
12434 assign( t5
, binop( Iop_And32
,
12435 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(26)),
12437 assign( t6
, binop( Iop_And32
,
12438 binop(Iop_Shr32
, getYMMRegLane32(rE
,6), mkU8(25)),
12440 assign( t7
, binop( Iop_And32
,
12441 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(24)),
12443 putIReg32( rG
, binop(Iop_Or32
,
12445 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12446 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ),
12448 binop(Iop_Or32
, mkexpr(t4
), mkexpr(t5
)),
12449 binop(Iop_Or32
, mkexpr(t6
), mkexpr(t7
)) ) ) );
12450 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12455 static Long
dis_MOVMSKPD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12456 Long delta
, Bool isAvx
)
12458 UChar modrm
= getUChar(delta
);
12459 UInt rG
= gregOfRexRM(pfx
,modrm
);
12460 UInt rE
= eregOfRexRM(pfx
,modrm
);
12461 IRTemp t0
= newTemp(Ity_I32
);
12462 IRTemp t1
= newTemp(Ity_I32
);
12464 assign( t0
, binop( Iop_And32
,
12465 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(31)),
12467 assign( t1
, binop( Iop_And32
,
12468 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(30)),
12470 putIReg32( rG
, binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
) ) );
12471 DIP("%smovmskpd %s,%s\n", isAvx
? "v" : "",
12472 nameXMMReg(rE
), nameIReg32(rG
));
12477 static Long
dis_MOVMSKPD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12479 UChar modrm
= getUChar(delta
);
12480 UInt rG
= gregOfRexRM(pfx
,modrm
);
12481 UInt rE
= eregOfRexRM(pfx
,modrm
);
12482 IRTemp t0
= newTemp(Ity_I32
);
12483 IRTemp t1
= newTemp(Ity_I32
);
12484 IRTemp t2
= newTemp(Ity_I32
);
12485 IRTemp t3
= newTemp(Ity_I32
);
12487 assign( t0
, binop( Iop_And32
,
12488 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(31)),
12490 assign( t1
, binop( Iop_And32
,
12491 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(30)),
12493 assign( t2
, binop( Iop_And32
,
12494 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(29)),
12496 assign( t3
, binop( Iop_And32
,
12497 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(28)),
12499 putIReg32( rG
, binop(Iop_Or32
,
12500 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12501 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12502 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12507 /* Note, this also handles SSE(1) insns. */
12508 __attribute__((noinline
))
12510 Long
dis_ESC_0F__SSE2 ( Bool
* decode_OK
,
12511 const VexArchInfo
* archinfo
,
12512 const VexAbiInfo
* vbi
,
12513 Prefix pfx
, Int sz
, Long deltaIN
,
12516 IRTemp addr
= IRTemp_INVALID
;
12517 IRTemp t0
= IRTemp_INVALID
;
12518 IRTemp t1
= IRTemp_INVALID
;
12519 IRTemp t2
= IRTemp_INVALID
;
12520 IRTemp t3
= IRTemp_INVALID
;
12521 IRTemp t4
= IRTemp_INVALID
;
12522 IRTemp t5
= IRTemp_INVALID
;
12523 IRTemp t6
= IRTemp_INVALID
;
12528 *decode_OK
= False
;
12530 Long delta
= deltaIN
;
12531 UChar opc
= getUChar(delta
);
12536 if (have66noF2noF3(pfx
)
12537 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12538 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12539 modrm
= getUChar(delta
);
12540 if (epartIsReg(modrm
)) {
12541 putXMMReg( gregOfRexRM(pfx
,modrm
),
12542 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12543 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12544 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12547 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12548 putXMMReg( gregOfRexRM(pfx
,modrm
),
12549 loadLE(Ity_V128
, mkexpr(addr
)) );
12550 DIP("movupd %s,%s\n", dis_buf
,
12551 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12554 goto decode_success
;
12556 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12557 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12558 If E is reg, upper half of G is unchanged. */
12559 if (haveF2no66noF3(pfx
)
12560 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8) ) {
12561 modrm
= getUChar(delta
);
12562 if (epartIsReg(modrm
)) {
12563 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12564 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
12565 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12566 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12569 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12570 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12571 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12572 loadLE(Ity_I64
, mkexpr(addr
)) );
12573 DIP("movsd %s,%s\n", dis_buf
,
12574 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12577 goto decode_success
;
12579 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12580 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12581 if (haveF3no66noF2(pfx
)
12582 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12583 modrm
= getUChar(delta
);
12584 if (epartIsReg(modrm
)) {
12585 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12586 getXMMRegLane32( eregOfRexRM(pfx
,modrm
), 0 ));
12587 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12588 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12591 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12592 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12593 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12594 loadLE(Ity_I32
, mkexpr(addr
)) );
12595 DIP("movss %s,%s\n", dis_buf
,
12596 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12599 goto decode_success
;
12601 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12602 if (haveNo66noF2noF3(pfx
)
12603 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12604 modrm
= getUChar(delta
);
12605 if (epartIsReg(modrm
)) {
12606 putXMMReg( gregOfRexRM(pfx
,modrm
),
12607 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12608 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12609 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12612 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12613 putXMMReg( gregOfRexRM(pfx
,modrm
),
12614 loadLE(Ity_V128
, mkexpr(addr
)) );
12615 DIP("movups %s,%s\n", dis_buf
,
12616 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12619 goto decode_success
;
12624 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12625 or lo half xmm). */
12626 if (haveF2no66noF3(pfx
)
12627 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12628 modrm
= getUChar(delta
);
12629 if (epartIsReg(modrm
)) {
12630 putXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0,
12631 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
12632 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12633 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12636 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12637 storeLE( mkexpr(addr
),
12638 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
12639 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12643 goto decode_success
;
12645 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12647 if (haveF3no66noF2(pfx
) && sz
== 4) {
12648 modrm
= getUChar(delta
);
12649 if (epartIsReg(modrm
)) {
12650 /* fall through, we don't yet have a test case */
12652 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12653 storeLE( mkexpr(addr
),
12654 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
12655 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12658 goto decode_success
;
12661 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12662 if (have66noF2noF3(pfx
)
12663 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12664 modrm
= getUChar(delta
);
12665 if (epartIsReg(modrm
)) {
12666 putXMMReg( eregOfRexRM(pfx
,modrm
),
12667 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
12668 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12669 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12672 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12673 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12674 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12678 goto decode_success
;
12680 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12681 if (haveNo66noF2noF3(pfx
)
12682 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12683 modrm
= getUChar(delta
);
12684 if (epartIsReg(modrm
)) {
12685 /* fall through; awaiting test case */
12687 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12688 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12689 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12692 goto decode_success
;
12698 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12699 /* Identical to MOVLPS ? */
12700 if (have66noF2noF3(pfx
)
12701 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12702 modrm
= getUChar(delta
);
12703 if (epartIsReg(modrm
)) {
12704 /* fall through; apparently reg-reg is not possible */
12706 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12708 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12710 loadLE(Ity_I64
, mkexpr(addr
)) );
12711 DIP("movlpd %s, %s\n",
12712 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12713 goto decode_success
;
12716 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12717 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12718 if (haveNo66noF2noF3(pfx
)
12719 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12720 modrm
= getUChar(delta
);
12721 if (epartIsReg(modrm
)) {
12723 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12725 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ));
12726 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12727 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12729 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12731 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0/*lower lane*/,
12732 loadLE(Ity_I64
, mkexpr(addr
)) );
12733 DIP("movlps %s, %s\n",
12734 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12736 goto decode_success
;
12741 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12742 if (haveNo66noF2noF3(pfx
)
12743 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12744 modrm
= getUChar(delta
);
12745 if (!epartIsReg(modrm
)) {
12746 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12748 storeLE( mkexpr(addr
),
12749 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12750 0/*lower lane*/ ) );
12751 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12753 goto decode_success
;
12755 /* else fall through */
12757 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12758 /* Identical to MOVLPS ? */
12759 if (have66noF2noF3(pfx
)
12760 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12761 modrm
= getUChar(delta
);
12762 if (!epartIsReg(modrm
)) {
12763 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12765 storeLE( mkexpr(addr
),
12766 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12767 0/*lower lane*/ ) );
12768 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12770 goto decode_success
;
12772 /* else fall through */
12778 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12779 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12780 /* These just appear to be special cases of SHUFPS */
12781 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
12782 Bool hi
= toBool(opc
== 0x15);
12783 IRTemp sV
= newTemp(Ity_V128
);
12784 IRTemp dV
= newTemp(Ity_V128
);
12785 modrm
= getUChar(delta
);
12786 UInt rG
= gregOfRexRM(pfx
,modrm
);
12787 assign( dV
, getXMMReg(rG
) );
12788 if (epartIsReg(modrm
)) {
12789 UInt rE
= eregOfRexRM(pfx
,modrm
);
12790 assign( sV
, getXMMReg(rE
) );
12792 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12793 nameXMMReg(rE
), nameXMMReg(rG
));
12795 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12796 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12798 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12799 dis_buf
, nameXMMReg(rG
));
12801 IRTemp res
= math_UNPCKxPS_128( sV
, dV
, hi
);
12802 putXMMReg( rG
, mkexpr(res
) );
12803 goto decode_success
;
12805 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12806 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12807 /* These just appear to be special cases of SHUFPS */
12808 if (have66noF2noF3(pfx
)
12809 && sz
== 2 /* could be 8 if rex also present */) {
12810 Bool hi
= toBool(opc
== 0x15);
12811 IRTemp sV
= newTemp(Ity_V128
);
12812 IRTemp dV
= newTemp(Ity_V128
);
12813 modrm
= getUChar(delta
);
12814 UInt rG
= gregOfRexRM(pfx
,modrm
);
12815 assign( dV
, getXMMReg(rG
) );
12816 if (epartIsReg(modrm
)) {
12817 UInt rE
= eregOfRexRM(pfx
,modrm
);
12818 assign( sV
, getXMMReg(rE
) );
12820 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12821 nameXMMReg(rE
), nameXMMReg(rG
));
12823 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12824 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12826 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12827 dis_buf
, nameXMMReg(rG
));
12829 IRTemp res
= math_UNPCKxPD_128( sV
, dV
, hi
);
12830 putXMMReg( rG
, mkexpr(res
) );
12831 goto decode_success
;
12836 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12837 /* These seems identical to MOVHPS. This instruction encoding is
12838 completely crazy. */
12839 if (have66noF2noF3(pfx
)
12840 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12841 modrm
= getUChar(delta
);
12842 if (epartIsReg(modrm
)) {
12843 /* fall through; apparently reg-reg is not possible */
12845 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12847 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12848 loadLE(Ity_I64
, mkexpr(addr
)) );
12849 DIP("movhpd %s,%s\n", dis_buf
,
12850 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12851 goto decode_success
;
12854 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12855 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12856 if (haveNo66noF2noF3(pfx
)
12857 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12858 modrm
= getUChar(delta
);
12859 if (epartIsReg(modrm
)) {
12861 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12862 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ) );
12863 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12864 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12866 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12868 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12869 loadLE(Ity_I64
, mkexpr(addr
)) );
12870 DIP("movhps %s,%s\n", dis_buf
,
12871 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12873 goto decode_success
;
12878 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12879 if (haveNo66noF2noF3(pfx
)
12880 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12881 modrm
= getUChar(delta
);
12882 if (!epartIsReg(modrm
)) {
12883 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12885 storeLE( mkexpr(addr
),
12886 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12887 1/*upper lane*/ ) );
12888 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12890 goto decode_success
;
12892 /* else fall through */
12894 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12895 /* Again, this seems identical to MOVHPS. */
12896 if (have66noF2noF3(pfx
)
12897 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12898 modrm
= getUChar(delta
);
12899 if (!epartIsReg(modrm
)) {
12900 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12902 storeLE( mkexpr(addr
),
12903 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12904 1/*upper lane*/ ) );
12905 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12907 goto decode_success
;
12909 /* else fall through */
12914 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12915 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12916 /* 0F 18 /2 = PREFETCH1 */
12917 /* 0F 18 /3 = PREFETCH2 */
12918 if (haveNo66noF2noF3(pfx
)
12919 && !epartIsReg(getUChar(delta
))
12920 && gregLO3ofRM(getUChar(delta
)) >= 0
12921 && gregLO3ofRM(getUChar(delta
)) <= 3) {
12922 const HChar
* hintstr
= "??";
12924 modrm
= getUChar(delta
);
12925 vassert(!epartIsReg(modrm
));
12927 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12930 switch (gregLO3ofRM(modrm
)) {
12931 case 0: hintstr
= "nta"; break;
12932 case 1: hintstr
= "t0"; break;
12933 case 2: hintstr
= "t1"; break;
12934 case 3: hintstr
= "t2"; break;
12935 default: vassert(0);
12938 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
12939 goto decode_success
;
12944 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12945 if (have66noF2noF3(pfx
)
12946 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12947 modrm
= getUChar(delta
);
12948 if (epartIsReg(modrm
)) {
12949 putXMMReg( gregOfRexRM(pfx
,modrm
),
12950 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12951 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12952 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12955 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12956 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
12957 putXMMReg( gregOfRexRM(pfx
,modrm
),
12958 loadLE(Ity_V128
, mkexpr(addr
)) );
12959 DIP("movapd %s,%s\n", dis_buf
,
12960 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12963 goto decode_success
;
12965 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12966 if (haveNo66noF2noF3(pfx
)
12967 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12968 modrm
= getUChar(delta
);
12969 if (epartIsReg(modrm
)) {
12970 putXMMReg( gregOfRexRM(pfx
,modrm
),
12971 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12972 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12973 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12976 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12977 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
12978 putXMMReg( gregOfRexRM(pfx
,modrm
),
12979 loadLE(Ity_V128
, mkexpr(addr
)) );
12980 DIP("movaps %s,%s\n", dis_buf
,
12981 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12984 goto decode_success
;
12989 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12990 if (haveNo66noF2noF3(pfx
)
12991 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12992 modrm
= getUChar(delta
);
12993 if (epartIsReg(modrm
)) {
12994 putXMMReg( eregOfRexRM(pfx
,modrm
),
12995 getXMMReg( gregOfRexRM(pfx
,modrm
) ));
12996 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12997 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
13000 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13001 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13002 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13003 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
13007 goto decode_success
;
13009 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
13010 if (have66noF2noF3(pfx
)
13011 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13012 modrm
= getUChar(delta
);
13013 if (epartIsReg(modrm
)) {
13014 putXMMReg( eregOfRexRM(pfx
,modrm
),
13015 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
13016 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
13017 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
13020 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13021 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13022 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13023 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
13027 goto decode_success
;
13032 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
13034 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13035 IRTemp arg64
= newTemp(Ity_I64
);
13036 IRTemp rmode
= newTemp(Ity_I32
);
13038 modrm
= getUChar(delta
);
13039 if (epartIsReg(modrm
)) {
13040 /* Only switch to MMX mode if the source is a MMX register.
13041 See comments on CVTPI2PD for details. Fixes #357059. */
13043 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
13045 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
13046 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13048 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13049 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13051 DIP("cvtpi2ps %s,%s\n", dis_buf
,
13052 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13055 assign( rmode
, get_sse_roundingmode() );
13058 gregOfRexRM(pfx
,modrm
), 0,
13059 binop(Iop_F64toF32
,
13061 unop(Iop_I32StoF64
,
13062 unop(Iop_64to32
, mkexpr(arg64
)) )) );
13065 gregOfRexRM(pfx
,modrm
), 1,
13066 binop(Iop_F64toF32
,
13068 unop(Iop_I32StoF64
,
13069 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
13071 goto decode_success
;
13073 /* F3 0F 2A = CVTSI2SS
13074 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
13075 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
13076 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13077 IRTemp rmode
= newTemp(Ity_I32
);
13078 assign( rmode
, get_sse_roundingmode() );
13079 modrm
= getUChar(delta
);
13081 IRTemp arg32
= newTemp(Ity_I32
);
13082 if (epartIsReg(modrm
)) {
13083 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13085 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13086 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13088 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13089 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13091 DIP("cvtsi2ss %s,%s\n", dis_buf
,
13092 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13095 gregOfRexRM(pfx
,modrm
), 0,
13096 binop(Iop_F64toF32
,
13098 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
13101 IRTemp arg64
= newTemp(Ity_I64
);
13102 if (epartIsReg(modrm
)) {
13103 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13105 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13106 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13108 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13109 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13111 DIP("cvtsi2ssq %s,%s\n", dis_buf
,
13112 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13115 gregOfRexRM(pfx
,modrm
), 0,
13116 binop(Iop_F64toF32
,
13118 binop(Iop_I64StoF64
, mkexpr(rmode
), mkexpr(arg64
)) ) );
13120 goto decode_success
;
13122 /* F2 0F 2A = CVTSI2SD
13123 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13124 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13126 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13127 modrm
= getUChar(delta
);
13129 IRTemp arg32
= newTemp(Ity_I32
);
13130 if (epartIsReg(modrm
)) {
13131 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13133 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13134 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13136 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13137 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13139 DIP("cvtsi2sdl %s,%s\n", dis_buf
,
13140 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13142 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13143 unop(Iop_I32StoF64
, mkexpr(arg32
))
13147 IRTemp arg64
= newTemp(Ity_I64
);
13148 if (epartIsReg(modrm
)) {
13149 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13151 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13152 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13154 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13155 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13157 DIP("cvtsi2sdq %s,%s\n", dis_buf
,
13158 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13161 gregOfRexRM(pfx
,modrm
),
13163 binop( Iop_I64StoF64
,
13164 get_sse_roundingmode(),
13169 goto decode_success
;
13171 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13173 if (have66noF2noF3(pfx
) && sz
== 2) {
13174 IRTemp arg64
= newTemp(Ity_I64
);
13176 modrm
= getUChar(delta
);
13177 if (epartIsReg(modrm
)) {
13178 /* Only switch to MMX mode if the source is a MMX register.
13179 This is inconsistent with all other instructions which
13180 convert between XMM and (M64 or MMX), which always switch
13181 to MMX mode even if 64-bit operand is M64 and not MMX. At
13182 least, that's what the Intel docs seem to me to say.
13185 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
13187 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
13188 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13190 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13191 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13193 DIP("cvtpi2pd %s,%s\n", dis_buf
,
13194 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13198 gregOfRexRM(pfx
,modrm
), 0,
13199 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
13203 gregOfRexRM(pfx
,modrm
), 1,
13204 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
13207 goto decode_success
;
13212 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13213 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13214 if ( (haveNo66noF2noF3(pfx
) && sz
== 4)
13215 || (have66noF2noF3(pfx
) && sz
== 2) ) {
13216 modrm
= getUChar(delta
);
13217 if (!epartIsReg(modrm
)) {
13218 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13219 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13220 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13221 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
13223 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13225 goto decode_success
;
13227 /* else fall through */
13233 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13234 I32 in mmx, according to prevailing SSE rounding mode */
13235 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13236 I32 in mmx, rounding towards zero */
13237 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13238 IRTemp dst64
= newTemp(Ity_I64
);
13239 IRTemp rmode
= newTemp(Ity_I32
);
13240 IRTemp f32lo
= newTemp(Ity_F32
);
13241 IRTemp f32hi
= newTemp(Ity_F32
);
13242 Bool r2zero
= toBool(opc
== 0x2C);
13245 modrm
= getUChar(delta
);
13247 if (epartIsReg(modrm
)) {
13249 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13250 assign(f32hi
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 1));
13251 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13252 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13253 nameMMXReg(gregLO3ofRM(modrm
)));
13255 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13256 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13257 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add64
,
13261 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13263 nameMMXReg(gregLO3ofRM(modrm
)));
13267 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13269 assign( rmode
, get_sse_roundingmode() );
13274 binop( Iop_32HLto64
,
13275 binop( Iop_F64toI32S
,
13277 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
13278 binop( Iop_F64toI32S
,
13280 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
13284 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13285 goto decode_success
;
13287 /* F3 0F 2D = CVTSS2SI
13288 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13289 according to prevailing SSE rounding mode
13290 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13291 according to prevailing SSE rounding mode
13293 /* F3 0F 2C = CVTTSS2SI
13294 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13295 truncating towards zero
13296 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13297 truncating towards zero
13299 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13300 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13301 goto decode_success
;
13303 /* F2 0F 2D = CVTSD2SI
13304 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13305 according to prevailing SSE rounding mode
13306 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13307 according to prevailing SSE rounding mode
13309 /* F2 0F 2C = CVTTSD2SI
13310 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13311 truncating towards zero
13312 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13313 truncating towards zero
13315 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13316 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13317 goto decode_success
;
13319 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13320 I32 in mmx, according to prevailing SSE rounding mode */
13321 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13322 I32 in mmx, rounding towards zero */
13323 if (have66noF2noF3(pfx
) && sz
== 2) {
13324 IRTemp dst64
= newTemp(Ity_I64
);
13325 IRTemp rmode
= newTemp(Ity_I32
);
13326 IRTemp f64lo
= newTemp(Ity_F64
);
13327 IRTemp f64hi
= newTemp(Ity_F64
);
13328 Bool r2zero
= toBool(opc
== 0x2C);
13331 modrm
= getUChar(delta
);
13333 if (epartIsReg(modrm
)) {
13335 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13336 assign(f64hi
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 1));
13337 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
13338 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13339 nameMMXReg(gregLO3ofRM(modrm
)));
13341 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13342 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13343 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add64
,
13347 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
13349 nameMMXReg(gregLO3ofRM(modrm
)));
13353 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13355 assign( rmode
, get_sse_roundingmode() );
13360 binop( Iop_32HLto64
,
13361 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
13362 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
13366 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13367 goto decode_success
;
13373 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13374 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13375 if (have66noF2noF3(pfx
) && sz
== 2) {
13376 delta
= dis_COMISD( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13377 goto decode_success
;
13379 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13380 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13381 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13382 delta
= dis_COMISS( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13383 goto decode_success
;
13388 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13389 to 4 lowest bits of ireg(G) */
13390 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
13391 && epartIsReg(getUChar(delta
))) {
13392 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13393 set to 1, which has been known to happen:
13395 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13397 20071106: Intel docs say that REX.W isn't redundant: when
13398 present, a 64-bit register is written; when not present, only
13399 the 32-bit half is written. However, testing on a Core2
13400 machine suggests the entire 64 bit register is written
13401 irrespective of the status of REX.W. That could be because
13402 of the default rule that says "if the lower half of a 32-bit
13403 register is written, the upper half is zeroed". By using
13404 putIReg32 here we inadvertantly produce the same behaviour as
13405 the Core2, for the same reason -- putIReg32 implements said
13408 AMD docs give no indication that REX.W is even valid for this
13410 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13411 goto decode_success
;
13413 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13414 2 lowest bits of ireg(G) */
13415 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
13416 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13417 set to 1, which has been known to happen:
13418 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13419 20071106: see further comments on MOVMSKPS implementation above.
13421 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13422 goto decode_success
;
13427 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13428 if (haveF3no66noF2(pfx
) && sz
== 4) {
13429 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13430 "sqrtss", Iop_Sqrt32F0x4
);
13431 goto decode_success
;
13433 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13434 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13435 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13436 "sqrtps", Iop_Sqrt32Fx4
);
13437 goto decode_success
;
13439 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13440 if (haveF2no66noF3(pfx
) && sz
== 4) {
13441 delta
= dis_SSE_E_to_G_unary_lo64( vbi
, pfx
, delta
,
13442 "sqrtsd", Iop_Sqrt64F0x2
);
13443 goto decode_success
;
13445 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13446 if (have66noF2noF3(pfx
) && sz
== 2) {
13447 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13448 "sqrtpd", Iop_Sqrt64Fx2
);
13449 goto decode_success
;
13454 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13455 if (haveF3no66noF2(pfx
) && sz
== 4) {
13456 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13457 "rsqrtss", Iop_RSqrtEst32F0x4
);
13458 goto decode_success
;
13460 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13461 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13462 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13463 "rsqrtps", Iop_RSqrtEst32Fx4
);
13464 goto decode_success
;
13469 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13470 if (haveF3no66noF2(pfx
) && sz
== 4) {
13471 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13472 "rcpss", Iop_RecipEst32F0x4
);
13473 goto decode_success
;
13475 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13476 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13477 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13478 "rcpps", Iop_RecipEst32Fx4
);
13479 goto decode_success
;
13484 /* 0F 54 = ANDPS -- G = G and E */
13485 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13486 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andps", Iop_AndV128
);
13487 goto decode_success
;
13489 /* 66 0F 54 = ANDPD -- G = G and E */
13490 if (have66noF2noF3(pfx
) && sz
== 2) {
13491 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andpd", Iop_AndV128
);
13492 goto decode_success
;
13497 /* 0F 55 = ANDNPS -- G = (not G) and E */
13498 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13499 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnps",
13501 goto decode_success
;
13503 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13504 if (have66noF2noF3(pfx
) && sz
== 2) {
13505 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnpd",
13507 goto decode_success
;
13512 /* 0F 56 = ORPS -- G = G and E */
13513 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13514 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orps", Iop_OrV128
);
13515 goto decode_success
;
13517 /* 66 0F 56 = ORPD -- G = G and E */
13518 if (have66noF2noF3(pfx
) && sz
== 2) {
13519 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orpd", Iop_OrV128
);
13520 goto decode_success
;
13525 /* 66 0F 57 = XORPD -- G = G xor E */
13526 if (have66noF2noF3(pfx
) && sz
== 2) {
13527 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorpd", Iop_XorV128
);
13528 goto decode_success
;
13530 /* 0F 57 = XORPS -- G = G xor E */
13531 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13532 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorps", Iop_XorV128
);
13533 goto decode_success
;
13538 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13539 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13540 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addps", Iop_Add32Fx4
);
13541 goto decode_success
;
13543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13544 if (haveF3no66noF2(pfx
) && sz
== 4) {
13545 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "addss", Iop_Add32F0x4
);
13546 goto decode_success
;
13548 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13549 if (haveF2no66noF3(pfx
)
13550 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13551 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "addsd", Iop_Add64F0x2
);
13552 goto decode_success
;
13554 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13555 if (have66noF2noF3(pfx
)
13556 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13557 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addpd", Iop_Add64Fx2
);
13558 goto decode_success
;
13563 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13564 if (haveF2no66noF3(pfx
)
13565 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13566 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "mulsd", Iop_Mul64F0x2
);
13567 goto decode_success
;
13569 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13570 if (haveF3no66noF2(pfx
) && sz
== 4) {
13571 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "mulss", Iop_Mul32F0x4
);
13572 goto decode_success
;
13574 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13575 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13576 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulps", Iop_Mul32Fx4
);
13577 goto decode_success
;
13579 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13580 if (have66noF2noF3(pfx
)
13581 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13582 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulpd", Iop_Mul64Fx2
);
13583 goto decode_success
;
13588 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13590 if (haveNo66noF2noF3(pfx
)
13591 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13592 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13593 goto decode_success
;
13595 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13597 if (haveF3no66noF2(pfx
) && sz
== 4) {
13598 IRTemp f32lo
= newTemp(Ity_F32
);
13600 modrm
= getUChar(delta
);
13601 if (epartIsReg(modrm
)) {
13603 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13604 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13605 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13607 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13608 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13610 DIP("cvtss2sd %s,%s\n", dis_buf
,
13611 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13614 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13615 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
13617 goto decode_success
;
13619 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13620 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13621 if (haveF2no66noF3(pfx
) && sz
== 4) {
13622 IRTemp rmode
= newTemp(Ity_I32
);
13623 IRTemp f64lo
= newTemp(Ity_F64
);
13625 modrm
= getUChar(delta
);
13626 if (epartIsReg(modrm
)) {
13628 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13629 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13630 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13632 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13633 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13635 DIP("cvtsd2ss %s,%s\n", dis_buf
,
13636 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13639 assign( rmode
, get_sse_roundingmode() );
13641 gregOfRexRM(pfx
,modrm
), 0,
13642 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
13645 goto decode_success
;
13647 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13648 lo half xmm(G), rounding according to prevailing SSE rounding
13649 mode, and zero upper half */
13650 /* Note, this is practically identical to CVTPD2DQ. It would have
13651 be nice to merge them together. */
13652 if (have66noF2noF3(pfx
) && sz
== 2) {
13653 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13654 goto decode_success
;
13659 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13660 xmm(G), rounding towards zero */
13661 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13662 xmm(G), as per the prevailing rounding mode */
13663 if ( (have66noF2noF3(pfx
) && sz
== 2)
13664 || (haveF3no66noF2(pfx
) && sz
== 4) ) {
13665 Bool r2zero
= toBool(sz
== 4); // FIXME -- unreliable (???)
13666 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/, r2zero
);
13667 goto decode_success
;
13669 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13671 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13672 delta
= dis_CVTDQ2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13673 goto decode_success
;
13678 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13679 if (haveF3no66noF2(pfx
) && sz
== 4) {
13680 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "subss", Iop_Sub32F0x4
);
13681 goto decode_success
;
13683 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13684 if (haveF2no66noF3(pfx
)
13685 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13686 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "subsd", Iop_Sub64F0x2
);
13687 goto decode_success
;
13689 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13690 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13691 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subps", Iop_Sub32Fx4
);
13692 goto decode_success
;
13694 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13695 if (have66noF2noF3(pfx
) && sz
== 2) {
13696 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subpd", Iop_Sub64Fx2
);
13697 goto decode_success
;
13702 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13703 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13704 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minps", Iop_Min32Fx4
);
13705 goto decode_success
;
13707 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13708 if (haveF3no66noF2(pfx
) && sz
== 4) {
13709 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "minss", Iop_Min32F0x4
);
13710 goto decode_success
;
13712 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13713 if (haveF2no66noF3(pfx
)
13714 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13715 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "minsd", Iop_Min64F0x2
);
13716 goto decode_success
;
13718 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13719 if (have66noF2noF3(pfx
) && sz
== 2) {
13720 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minpd", Iop_Min64Fx2
);
13721 goto decode_success
;
13726 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13727 if (haveF2no66noF3(pfx
) && sz
== 4) {
13728 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "divsd", Iop_Div64F0x2
);
13729 goto decode_success
;
13731 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13732 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13733 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divps", Iop_Div32Fx4
);
13734 goto decode_success
;
13736 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13737 if (haveF3no66noF2(pfx
) && sz
== 4) {
13738 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "divss", Iop_Div32F0x4
);
13739 goto decode_success
;
13741 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13742 if (have66noF2noF3(pfx
) && sz
== 2) {
13743 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divpd", Iop_Div64Fx2
);
13744 goto decode_success
;
13749 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13750 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13751 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxps", Iop_Max32Fx4
);
13752 goto decode_success
;
13754 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13755 if (haveF3no66noF2(pfx
) && sz
== 4) {
13756 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "maxss", Iop_Max32F0x4
);
13757 goto decode_success
;
13759 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13760 if (haveF2no66noF3(pfx
)
13761 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13762 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "maxsd", Iop_Max64F0x2
);
13763 goto decode_success
;
13765 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13766 if (have66noF2noF3(pfx
) && sz
== 2) {
13767 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxpd", Iop_Max64Fx2
);
13768 goto decode_success
;
13773 /* 66 0F 60 = PUNPCKLBW */
13774 if (have66noF2noF3(pfx
) && sz
== 2) {
13775 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13777 Iop_InterleaveLO8x16
, True
);
13778 goto decode_success
;
13783 /* 66 0F 61 = PUNPCKLWD */
13784 if (have66noF2noF3(pfx
) && sz
== 2) {
13785 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13787 Iop_InterleaveLO16x8
, True
);
13788 goto decode_success
;
13793 /* 66 0F 62 = PUNPCKLDQ */
13794 if (have66noF2noF3(pfx
) && sz
== 2) {
13795 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13797 Iop_InterleaveLO32x4
, True
);
13798 goto decode_success
;
13803 /* 66 0F 63 = PACKSSWB */
13804 if (have66noF2noF3(pfx
) && sz
== 2) {
13805 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13807 Iop_QNarrowBin16Sto8Sx16
, True
);
13808 goto decode_success
;
13813 /* 66 0F 64 = PCMPGTB */
13814 if (have66noF2noF3(pfx
) && sz
== 2) {
13815 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13816 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
13817 goto decode_success
;
13822 /* 66 0F 65 = PCMPGTW */
13823 if (have66noF2noF3(pfx
) && sz
== 2) {
13824 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13825 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
13826 goto decode_success
;
13831 /* 66 0F 66 = PCMPGTD */
13832 if (have66noF2noF3(pfx
) && sz
== 2) {
13833 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13834 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
13835 goto decode_success
;
13840 /* 66 0F 67 = PACKUSWB */
13841 if (have66noF2noF3(pfx
) && sz
== 2) {
13842 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13844 Iop_QNarrowBin16Sto8Ux16
, True
);
13845 goto decode_success
;
13850 /* 66 0F 68 = PUNPCKHBW */
13851 if (have66noF2noF3(pfx
) && sz
== 2) {
13852 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13854 Iop_InterleaveHI8x16
, True
);
13855 goto decode_success
;
13860 /* 66 0F 69 = PUNPCKHWD */
13861 if (have66noF2noF3(pfx
) && sz
== 2) {
13862 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13864 Iop_InterleaveHI16x8
, True
);
13865 goto decode_success
;
13870 /* 66 0F 6A = PUNPCKHDQ */
13871 if (have66noF2noF3(pfx
) && sz
== 2) {
13872 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13874 Iop_InterleaveHI32x4
, True
);
13875 goto decode_success
;
13880 /* 66 0F 6B = PACKSSDW */
13881 if (have66noF2noF3(pfx
) && sz
== 2) {
13882 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13884 Iop_QNarrowBin32Sto16Sx8
, True
);
13885 goto decode_success
;
13890 /* 66 0F 6C = PUNPCKLQDQ */
13891 if (have66noF2noF3(pfx
) && sz
== 2) {
13892 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13894 Iop_InterleaveLO64x2
, True
);
13895 goto decode_success
;
13900 /* 66 0F 6D = PUNPCKHQDQ */
13901 if (have66noF2noF3(pfx
) && sz
== 2) {
13902 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13904 Iop_InterleaveHI64x2
, True
);
13905 goto decode_success
;
13910 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13911 zeroing high 3/4 of xmm. */
13912 /* or from ireg64/m64 to xmm lo 1/2,
13913 zeroing high 1/2 of xmm. */
13914 if (have66noF2noF3(pfx
)) {
13915 vassert(sz
== 2 || sz
== 8);
13916 if (sz
== 2) sz
= 4;
13917 modrm
= getUChar(delta
);
13918 if (epartIsReg(modrm
)) {
13922 gregOfRexRM(pfx
,modrm
),
13923 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
13925 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13926 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13929 gregOfRexRM(pfx
,modrm
),
13930 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
13932 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13933 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13936 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13939 gregOfRexRM(pfx
,modrm
),
13941 ? unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
13942 : unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)) )
13944 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q', dis_buf
,
13945 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13947 goto decode_success
;
13952 if (have66noF2noF3(pfx
)
13953 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13954 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13955 modrm
= getUChar(delta
);
13956 if (epartIsReg(modrm
)) {
13957 putXMMReg( gregOfRexRM(pfx
,modrm
),
13958 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13959 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13960 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13963 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13964 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13965 putXMMReg( gregOfRexRM(pfx
,modrm
),
13966 loadLE(Ity_V128
, mkexpr(addr
)) );
13967 DIP("movdqa %s,%s\n", dis_buf
,
13968 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13971 goto decode_success
;
13973 if (haveF3no66noF2(pfx
)
13974 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13975 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13976 modrm
= getUChar(delta
);
13977 if (epartIsReg(modrm
)) {
13978 putXMMReg( gregOfRexRM(pfx
,modrm
),
13979 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13980 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13981 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13984 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13985 putXMMReg( gregOfRexRM(pfx
,modrm
),
13986 loadLE(Ity_V128
, mkexpr(addr
)) );
13987 DIP("movdqu %s,%s\n", dis_buf
,
13988 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13991 goto decode_success
;
13996 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13997 if (have66noF2noF3(pfx
) && sz
== 2) {
13998 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, False
/*!writesYmm*/);
13999 goto decode_success
;
14001 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14002 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
14003 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14005 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
14006 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
14007 sV
= newTemp(Ity_I64
);
14008 dV
= newTemp(Ity_I64
);
14010 modrm
= getUChar(delta
);
14011 if (epartIsReg(modrm
)) {
14012 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
14013 order
= (Int
)getUChar(delta
+1);
14015 DIP("pshufw $%d,%s,%s\n", order
,
14016 nameMMXReg(eregLO3ofRM(modrm
)),
14017 nameMMXReg(gregLO3ofRM(modrm
)));
14019 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
14020 1/*extra byte after amode*/ );
14021 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
14022 order
= (Int
)getUChar(delta
+alen
);
14024 DIP("pshufw $%d,%s,%s\n", order
,
14026 nameMMXReg(gregLO3ofRM(modrm
)));
14028 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
14030 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
14032 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
14033 SEL((order
>>2)&3), SEL((order
>>0)&3) )
14035 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dV
));
14037 goto decode_success
;
14039 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
14040 mem) to G(xmm), and copy upper half */
14041 if (haveF2no66noF3(pfx
) && sz
== 4) {
14042 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
14043 False
/*!isAvx*/, False
/*!xIsH*/ );
14044 goto decode_success
;
14046 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
14047 mem) to G(xmm), and copy lower half */
14048 if (haveF3no66noF2(pfx
) && sz
== 4) {
14049 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
14050 False
/*!isAvx*/, True
/*xIsH*/ );
14051 goto decode_success
;
14056 /* 66 0F 71 /2 ib = PSRLW by immediate */
14057 if (have66noF2noF3(pfx
) && sz
== 2
14058 && epartIsReg(getUChar(delta
))
14059 && gregLO3ofRM(getUChar(delta
)) == 2) {
14060 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlw", Iop_ShrN16x8
);
14061 goto decode_success
;
14063 /* 66 0F 71 /4 ib = PSRAW by immediate */
14064 if (have66noF2noF3(pfx
) && sz
== 2
14065 && epartIsReg(getUChar(delta
))
14066 && gregLO3ofRM(getUChar(delta
)) == 4) {
14067 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psraw", Iop_SarN16x8
);
14068 goto decode_success
;
14070 /* 66 0F 71 /6 ib = PSLLW by immediate */
14071 if (have66noF2noF3(pfx
) && sz
== 2
14072 && epartIsReg(getUChar(delta
))
14073 && gregLO3ofRM(getUChar(delta
)) == 6) {
14074 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllw", Iop_ShlN16x8
);
14075 goto decode_success
;
14080 /* 66 0F 72 /2 ib = PSRLD by immediate */
14081 if (have66noF2noF3(pfx
) && sz
== 2
14082 && epartIsReg(getUChar(delta
))
14083 && gregLO3ofRM(getUChar(delta
)) == 2) {
14084 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrld", Iop_ShrN32x4
);
14085 goto decode_success
;
14087 /* 66 0F 72 /4 ib = PSRAD by immediate */
14088 if (have66noF2noF3(pfx
) && sz
== 2
14089 && epartIsReg(getUChar(delta
))
14090 && gregLO3ofRM(getUChar(delta
)) == 4) {
14091 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrad", Iop_SarN32x4
);
14092 goto decode_success
;
14094 /* 66 0F 72 /6 ib = PSLLD by immediate */
14095 if (have66noF2noF3(pfx
) && sz
== 2
14096 && epartIsReg(getUChar(delta
))
14097 && gregLO3ofRM(getUChar(delta
)) == 6) {
14098 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "pslld", Iop_ShlN32x4
);
14099 goto decode_success
;
14104 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14105 /* note, if mem case ever filled in, 1 byte after amode */
14106 if (have66noF2noF3(pfx
) && sz
== 2
14107 && epartIsReg(getUChar(delta
))
14108 && gregLO3ofRM(getUChar(delta
)) == 3) {
14109 Int imm
= (Int
)getUChar(delta
+1);
14110 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14111 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
14113 IRTemp sV
= newTemp(Ity_V128
);
14114 assign( sV
, getXMMReg(reg
) );
14115 putXMMReg(reg
, mkexpr(math_PSRLDQ( sV
, imm
)));
14116 goto decode_success
;
14118 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14119 /* note, if mem case ever filled in, 1 byte after amode */
14120 if (have66noF2noF3(pfx
) && sz
== 2
14121 && epartIsReg(getUChar(delta
))
14122 && gregLO3ofRM(getUChar(delta
)) == 7) {
14123 Int imm
= (Int
)getUChar(delta
+1);
14124 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14125 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
14126 vassert(imm
>= 0 && imm
<= 255);
14128 IRTemp sV
= newTemp(Ity_V128
);
14129 assign( sV
, getXMMReg(reg
) );
14130 putXMMReg(reg
, mkexpr(math_PSLLDQ( sV
, imm
)));
14131 goto decode_success
;
14133 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14134 if (have66noF2noF3(pfx
) && sz
== 2
14135 && epartIsReg(getUChar(delta
))
14136 && gregLO3ofRM(getUChar(delta
)) == 2) {
14137 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14138 goto decode_success
;
14140 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14141 if (have66noF2noF3(pfx
) && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)
14142 && epartIsReg(getUChar(delta
))
14143 && gregLO3ofRM(getUChar(delta
)) == 6) {
14144 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllq", Iop_ShlN64x2
);
14145 goto decode_success
;
14150 /* 66 0F 74 = PCMPEQB */
14151 if (have66noF2noF3(pfx
) && sz
== 2) {
14152 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14153 "pcmpeqb", Iop_CmpEQ8x16
, False
);
14154 goto decode_success
;
14159 /* 66 0F 75 = PCMPEQW */
14160 if (have66noF2noF3(pfx
) && sz
== 2) {
14161 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14162 "pcmpeqw", Iop_CmpEQ16x8
, False
);
14163 goto decode_success
;
14168 /* 66 0F 76 = PCMPEQD */
14169 if (have66noF2noF3(pfx
) && sz
== 2) {
14170 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14171 "pcmpeqd", Iop_CmpEQ32x4
, False
);
14172 goto decode_success
;
14177 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14178 G (lo half xmm). Upper half of G is zeroed out. */
14179 if (haveF3no66noF2(pfx
)
14180 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14181 modrm
= getUChar(delta
);
14182 if (epartIsReg(modrm
)) {
14183 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14184 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14185 /* zero bits 127:64 */
14186 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1, mkU64(0) );
14187 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14188 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14191 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14192 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
14193 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14194 loadLE(Ity_I64
, mkexpr(addr
)) );
14195 DIP("movsd %s,%s\n", dis_buf
,
14196 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14199 goto decode_success
;
14201 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14202 /* or from xmm low 1/2 to ireg64 or m64. */
14203 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
14204 if (sz
== 2) sz
= 4;
14205 modrm
= getUChar(delta
);
14206 if (epartIsReg(modrm
)) {
14209 putIReg32( eregOfRexRM(pfx
,modrm
),
14210 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
14211 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14212 nameIReg32(eregOfRexRM(pfx
,modrm
)));
14214 putIReg64( eregOfRexRM(pfx
,modrm
),
14215 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
14216 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14217 nameIReg64(eregOfRexRM(pfx
,modrm
)));
14220 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14222 storeLE( mkexpr(addr
),
14224 ? getXMMRegLane32(gregOfRexRM(pfx
,modrm
),0)
14225 : getXMMRegLane64(gregOfRexRM(pfx
,modrm
),0) );
14226 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q',
14227 nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14229 goto decode_success
;
14234 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14235 if (haveF3no66noF2(pfx
)
14236 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14237 modrm
= getUChar(delta
);
14238 if (epartIsReg(modrm
)) {
14239 goto decode_failure
; /* awaiting test case */
14241 putXMMReg( eregOfRexRM(pfx
,modrm
),
14242 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14243 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14244 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14246 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14248 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14249 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14251 goto decode_success
;
14253 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14254 if (have66noF2noF3(pfx
) && sz
== 2) {
14255 modrm
= getUChar(delta
);
14256 if (epartIsReg(modrm
)) {
14258 putXMMReg( eregOfRexRM(pfx
,modrm
),
14259 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14260 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14261 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14263 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14264 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
14266 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14267 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14269 goto decode_success
;
14274 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14275 if (haveNo66noF2noF3(pfx
)
14276 && epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14279 /* Insert a memory fence. It's sometimes important that these
14280 are carried through to the generated code. */
14281 stmt( IRStmt_MBE(Imbe_Fence
) );
14283 goto decode_success
;
14285 /* mindless duplication follows .. */
14286 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14287 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14288 if (haveNo66noF2noF3(pfx
)
14289 && epartIsReg(getUChar(delta
))
14290 && (gregLO3ofRM(getUChar(delta
)) == 5
14291 || gregLO3ofRM(getUChar(delta
)) == 6)
14294 /* Insert a memory fence. It's sometimes important that these
14295 are carried through to the generated code. */
14296 stmt( IRStmt_MBE(Imbe_Fence
) );
14297 DIP("%sfence\n", gregLO3ofRM(getUChar(delta
-1))==5 ? "l" : "m");
14298 goto decode_success
;
14301 /* 0F AE /7 = CLFLUSH -- flush cache line */
14302 if (haveNo66noF2noF3(pfx
)
14303 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14306 /* This is something of a hack. We need to know the size of
14307 the cache line containing addr. Since we don't (easily),
14308 assume 256 on the basis that no real cache would have a
14309 line that big. It's safe to invalidate more stuff than we
14310 need, just inefficient. */
14311 ULong lineszB
= 256ULL;
14313 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14316 /* Round addr down to the start of the containing block. */
14321 mkU64( ~(lineszB
-1) ))) );
14323 stmt( IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
) ) );
14325 jmp_lit(dres
, Ijk_InvalICache
, (Addr64
)(guest_RIP_bbstart
+delta
));
14327 DIP("clflush %s\n", dis_buf
);
14328 goto decode_success
;
14331 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14332 if (haveNo66noF2noF3(pfx
)
14333 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
14335 delta
= dis_STMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14336 goto decode_success
;
14338 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14339 if (haveNo66noF2noF3(pfx
)
14340 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
14342 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14343 goto decode_success
;
14345 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14346 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14347 && !epartIsReg(getUChar(delta
))
14348 && gregOfRexRM(pfx
,getUChar(delta
)) == 0) {
14349 delta
= dis_FXSAVE(vbi
, pfx
, delta
, sz
);
14350 goto decode_success
;
14352 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14353 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14354 && !epartIsReg(getUChar(delta
))
14355 && gregOfRexRM(pfx
,getUChar(delta
)) == 1) {
14356 delta
= dis_FXRSTOR(vbi
, pfx
, delta
, sz
);
14357 goto decode_success
;
14359 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14360 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14361 && !epartIsReg(getUChar(delta
))
14362 && gregOfRexRM(pfx
,getUChar(delta
)) == 4
14363 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14364 delta
= dis_XSAVE(vbi
, pfx
, delta
, sz
);
14365 goto decode_success
;
14367 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14368 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14369 && !epartIsReg(getUChar(delta
))
14370 && gregOfRexRM(pfx
,getUChar(delta
)) == 5
14371 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14372 delta
= dis_XRSTOR(vbi
, pfx
, delta
, sz
);
14373 goto decode_success
;
14378 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14379 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14380 Long delta0
= delta
;
14381 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpps", True
, 4 );
14382 if (delta
> delta0
) goto decode_success
;
14384 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14385 if (haveF3no66noF2(pfx
) && sz
== 4) {
14386 Long delta0
= delta
;
14387 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpss", False
, 4 );
14388 if (delta
> delta0
) goto decode_success
;
14390 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14391 if (haveF2no66noF3(pfx
) && sz
== 4) {
14392 Long delta0
= delta
;
14393 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpsd", False
, 8 );
14394 if (delta
> delta0
) goto decode_success
;
14396 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14397 if (have66noF2noF3(pfx
) && sz
== 2) {
14398 Long delta0
= delta
;
14399 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmppd", True
, 8 );
14400 if (delta
> delta0
) goto decode_success
;
14405 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14406 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14407 modrm
= getUChar(delta
);
14408 if (!epartIsReg(modrm
)) {
14409 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14410 storeLE( mkexpr(addr
), getIRegG(sz
, pfx
, modrm
) );
14411 DIP("movnti %s,%s\n", dis_buf
,
14412 nameIRegG(sz
, pfx
, modrm
));
14414 goto decode_success
;
14416 /* else fall through */
14421 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14422 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14423 put it into the specified lane of mmx(G). */
14424 if (haveNo66noF2noF3(pfx
)
14425 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14426 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14427 mmx reg. t4 is the new lane value. t5 is the original
14428 mmx value. t6 is the new mmx value. */
14430 t4
= newTemp(Ity_I16
);
14431 t5
= newTemp(Ity_I64
);
14432 t6
= newTemp(Ity_I64
);
14433 modrm
= getUChar(delta
);
14436 assign(t5
, getMMXReg(gregLO3ofRM(modrm
)));
14437 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
14439 if (epartIsReg(modrm
)) {
14440 assign(t4
, getIReg16(eregOfRexRM(pfx
,modrm
)));
14442 lane
= getUChar(delta
-1);
14443 DIP("pinsrw $%d,%s,%s\n", lane
,
14444 nameIReg16(eregOfRexRM(pfx
,modrm
)),
14445 nameMMXReg(gregLO3ofRM(modrm
)));
14447 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14449 lane
= getUChar(delta
-1);
14450 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14451 DIP("pinsrw $%d,%s,%s\n", lane
,
14453 nameMMXReg(gregLO3ofRM(modrm
)));
14456 switch (lane
& 3) {
14457 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
14458 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
14459 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
14460 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
14461 default: vassert(0);
14463 putMMXReg(gregLO3ofRM(modrm
), mkexpr(t6
));
14464 goto decode_success
;
14466 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14467 put it into the specified lane of xmm(G). */
14468 if (have66noF2noF3(pfx
)
14469 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14471 t4
= newTemp(Ity_I16
);
14472 modrm
= getUChar(delta
);
14473 UInt rG
= gregOfRexRM(pfx
,modrm
);
14474 if (epartIsReg(modrm
)) {
14475 UInt rE
= eregOfRexRM(pfx
,modrm
);
14476 assign(t4
, getIReg16(rE
));
14478 lane
= getUChar(delta
-1);
14479 DIP("pinsrw $%d,%s,%s\n",
14480 lane
, nameIReg16(rE
), nameXMMReg(rG
));
14482 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
14483 1/*byte after the amode*/ );
14485 lane
= getUChar(delta
-1);
14486 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14487 DIP("pinsrw $%d,%s,%s\n",
14488 lane
, dis_buf
, nameXMMReg(rG
));
14490 IRTemp src_vec
= newTemp(Ity_V128
);
14491 assign(src_vec
, getXMMReg(rG
));
14492 IRTemp res_vec
= math_PINSRW_128( src_vec
, t4
, lane
& 7);
14493 putXMMReg(rG
, mkexpr(res_vec
));
14494 goto decode_success
;
14499 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14500 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14501 zero-extend of it in ireg(G). */
14502 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14503 modrm
= getUChar(delta
);
14504 if (epartIsReg(modrm
)) {
14505 IRTemp sV
= newTemp(Ity_I64
);
14506 t5
= newTemp(Ity_I16
);
14508 assign(sV
, getMMXReg(eregLO3ofRM(modrm
)));
14509 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
14510 switch (getUChar(delta
+1) & 3) {
14511 case 0: assign(t5
, mkexpr(t0
)); break;
14512 case 1: assign(t5
, mkexpr(t1
)); break;
14513 case 2: assign(t5
, mkexpr(t2
)); break;
14514 case 3: assign(t5
, mkexpr(t3
)); break;
14515 default: vassert(0);
14518 putIReg64(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto64
, mkexpr(t5
)));
14520 putIReg32(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
14521 DIP("pextrw $%d,%s,%s\n",
14522 (Int
)getUChar(delta
+1),
14523 nameMMXReg(eregLO3ofRM(modrm
)),
14524 sz
==8 ? nameIReg64(gregOfRexRM(pfx
,modrm
))
14525 : nameIReg32(gregOfRexRM(pfx
,modrm
))
14528 goto decode_success
;
14530 /* else fall through */
14531 /* note, for anyone filling in the mem case: this insn has one
14532 byte after the amode and therefore you must pass 1 as the
14533 last arg to disAMode */
14535 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14536 zero-extend of it in ireg(G). */
14537 if (have66noF2noF3(pfx
)
14538 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14539 Long delta0
= delta
;
14540 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
14542 if (delta
> delta0
) goto decode_success
;
14543 /* else fall through -- decoding has failed */
14548 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14549 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14551 IRTemp sV
= newTemp(Ity_V128
);
14552 IRTemp dV
= newTemp(Ity_V128
);
14553 modrm
= getUChar(delta
);
14554 UInt rG
= gregOfRexRM(pfx
,modrm
);
14555 assign( dV
, getXMMReg(rG
) );
14556 if (epartIsReg(modrm
)) {
14557 UInt rE
= eregOfRexRM(pfx
,modrm
);
14558 assign( sV
, getXMMReg(rE
) );
14559 imm8
= (Int
)getUChar(delta
+1);
14561 DIP("shufps $%d,%s,%s\n", imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
14563 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14564 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14565 imm8
= (Int
)getUChar(delta
+alen
);
14567 DIP("shufps $%d,%s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
));
14569 IRTemp res
= math_SHUFPS_128( sV
, dV
, imm8
);
14570 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14571 goto decode_success
;
14573 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14574 if (have66noF2noF3(pfx
) && sz
== 2) {
14576 IRTemp sV
= newTemp(Ity_V128
);
14577 IRTemp dV
= newTemp(Ity_V128
);
14579 modrm
= getUChar(delta
);
14580 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14582 if (epartIsReg(modrm
)) {
14583 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
14584 select
= (Int
)getUChar(delta
+1);
14586 DIP("shufpd $%d,%s,%s\n", select
,
14587 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14588 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14590 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14591 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14592 select
= getUChar(delta
+alen
);
14594 DIP("shufpd $%d,%s,%s\n", select
,
14596 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14599 IRTemp res
= math_SHUFPD_128( sV
, dV
, select
);
14600 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14601 goto decode_success
;
14606 /* 66 0F D1 = PSRLW by E */
14607 if (have66noF2noF3(pfx
) && sz
== 2) {
14608 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlw", Iop_ShrN16x8
);
14609 goto decode_success
;
14614 /* 66 0F D2 = PSRLD by E */
14615 if (have66noF2noF3(pfx
) && sz
== 2) {
14616 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrld", Iop_ShrN32x4
);
14617 goto decode_success
;
14622 /* 66 0F D3 = PSRLQ by E */
14623 if (have66noF2noF3(pfx
) && sz
== 2) {
14624 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14625 goto decode_success
;
14630 /* 66 0F D4 = PADDQ */
14631 if (have66noF2noF3(pfx
) && sz
== 2) {
14632 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14633 "paddq", Iop_Add64x2
, False
);
14634 goto decode_success
;
14636 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14637 /* 0F D4 = PADDQ -- add 64x1 */
14638 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14640 delta
= dis_MMXop_regmem_to_reg (
14641 vbi
, pfx
, delta
, opc
, "paddq", False
);
14642 goto decode_success
;
14647 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14648 if (have66noF2noF3(pfx
) && sz
== 2) {
14649 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14650 "pmullw", Iop_Mul16x8
, False
);
14651 goto decode_success
;
14656 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14658 if (haveF3no66noF2(pfx
) && sz
== 4) {
14659 modrm
= getUChar(delta
);
14660 if (epartIsReg(modrm
)) {
14662 putXMMReg( gregOfRexRM(pfx
,modrm
),
14663 unop(Iop_64UtoV128
, getMMXReg( eregLO3ofRM(modrm
) )) );
14664 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14665 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14667 goto decode_success
;
14669 /* apparently no mem case for this insn */
14671 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14672 or lo half xmm). */
14673 if (have66noF2noF3(pfx
)
14674 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14675 modrm
= getUChar(delta
);
14676 if (epartIsReg(modrm
)) {
14677 /* fall through, awaiting test case */
14678 /* dst: lo half copied, hi half zeroed */
14680 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14681 storeLE( mkexpr(addr
),
14682 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
14683 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14685 goto decode_success
;
14688 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14689 if (haveF2no66noF3(pfx
) && sz
== 4) {
14690 modrm
= getUChar(delta
);
14691 if (epartIsReg(modrm
)) {
14693 putMMXReg( gregLO3ofRM(modrm
),
14694 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14695 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14696 nameMMXReg(gregLO3ofRM(modrm
)));
14698 goto decode_success
;
14700 /* apparently no mem case for this insn */
14705 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14706 lanes in xmm(E), turn them into a byte, and put
14707 zero-extend of it in ireg(G). Doing this directly is just
14708 too cumbersome; give up therefore and call a helper. */
14709 if (have66noF2noF3(pfx
)
14710 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)
14711 && epartIsReg(getUChar(delta
))) { /* no memory case, it seems */
14712 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
14713 goto decode_success
;
14715 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14716 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14717 mmx(E), turn them into a byte, and put zero-extend of it in
14719 if (haveNo66noF2noF3(pfx
)
14720 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14721 modrm
= getUChar(delta
);
14722 if (epartIsReg(modrm
)) {
14724 t0
= newTemp(Ity_I64
);
14725 t1
= newTemp(Ity_I32
);
14726 assign(t0
, getMMXReg(eregLO3ofRM(modrm
)));
14727 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
14728 putIReg32(gregOfRexRM(pfx
,modrm
), mkexpr(t1
));
14729 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14730 nameIReg32(gregOfRexRM(pfx
,modrm
)));
14732 goto decode_success
;
14734 /* else fall through */
14739 /* 66 0F D8 = PSUBUSB */
14740 if (have66noF2noF3(pfx
) && sz
== 2) {
14741 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14742 "psubusb", Iop_QSub8Ux16
, False
);
14743 goto decode_success
;
14748 /* 66 0F D9 = PSUBUSW */
14749 if (have66noF2noF3(pfx
) && sz
== 2) {
14750 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14751 "psubusw", Iop_QSub16Ux8
, False
);
14752 goto decode_success
;
14757 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14758 /* 0F DA = PMINUB -- 8x8 unsigned min */
14759 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14761 delta
= dis_MMXop_regmem_to_reg (
14762 vbi
, pfx
, delta
, opc
, "pminub", False
);
14763 goto decode_success
;
14765 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14766 if (have66noF2noF3(pfx
) && sz
== 2) {
14767 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14768 "pminub", Iop_Min8Ux16
, False
);
14769 goto decode_success
;
14774 /* 66 0F DB = PAND */
14775 if (have66noF2noF3(pfx
) && sz
== 2) {
14776 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pand", Iop_AndV128
);
14777 goto decode_success
;
14782 /* 66 0F DC = PADDUSB */
14783 if (have66noF2noF3(pfx
) && sz
== 2) {
14784 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14785 "paddusb", Iop_QAdd8Ux16
, False
);
14786 goto decode_success
;
14791 /* 66 0F DD = PADDUSW */
14792 if (have66noF2noF3(pfx
) && sz
== 2) {
14793 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14794 "paddusw", Iop_QAdd16Ux8
, False
);
14795 goto decode_success
;
14800 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14801 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14802 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14804 delta
= dis_MMXop_regmem_to_reg (
14805 vbi
, pfx
, delta
, opc
, "pmaxub", False
);
14806 goto decode_success
;
14808 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14809 if (have66noF2noF3(pfx
) && sz
== 2) {
14810 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14811 "pmaxub", Iop_Max8Ux16
, False
);
14812 goto decode_success
;
14817 /* 66 0F DF = PANDN */
14818 if (have66noF2noF3(pfx
) && sz
== 2) {
14819 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "pandn", Iop_AndV128
);
14820 goto decode_success
;
14825 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14826 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14827 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14829 delta
= dis_MMXop_regmem_to_reg (
14830 vbi
, pfx
, delta
, opc
, "pavgb", False
);
14831 goto decode_success
;
14833 /* 66 0F E0 = PAVGB */
14834 if (have66noF2noF3(pfx
) && sz
== 2) {
14835 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14836 "pavgb", Iop_Avg8Ux16
, False
);
14837 goto decode_success
;
14842 /* 66 0F E1 = PSRAW by E */
14843 if (have66noF2noF3(pfx
) && sz
== 2) {
14844 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psraw", Iop_SarN16x8
);
14845 goto decode_success
;
14850 /* 66 0F E2 = PSRAD by E */
14851 if (have66noF2noF3(pfx
) && sz
== 2) {
14852 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrad", Iop_SarN32x4
);
14853 goto decode_success
;
14858 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14859 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14860 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14862 delta
= dis_MMXop_regmem_to_reg (
14863 vbi
, pfx
, delta
, opc
, "pavgw", False
);
14864 goto decode_success
;
14866 /* 66 0F E3 = PAVGW */
14867 if (have66noF2noF3(pfx
) && sz
== 2) {
14868 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14869 "pavgw", Iop_Avg16Ux8
, False
);
14870 goto decode_success
;
14875 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14876 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14877 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14879 delta
= dis_MMXop_regmem_to_reg (
14880 vbi
, pfx
, delta
, opc
, "pmuluh", False
);
14881 goto decode_success
;
14883 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14884 if (have66noF2noF3(pfx
) && sz
== 2) {
14885 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14886 "pmulhuw", Iop_MulHi16Ux8
, False
);
14887 goto decode_success
;
14892 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14893 if (have66noF2noF3(pfx
) && sz
== 2) {
14894 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14895 "pmulhw", Iop_MulHi16Sx8
, False
);
14896 goto decode_success
;
14901 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14902 lo half xmm(G), and zero upper half, rounding towards zero */
14903 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14904 lo half xmm(G), according to prevailing rounding mode, and zero
14906 if ( (haveF2no66noF3(pfx
) && sz
== 4)
14907 || (have66noF2noF3(pfx
) && sz
== 2) ) {
14908 delta
= dis_CVTxPD2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
14909 toBool(sz
== 2)/*r2zero*/);
14910 goto decode_success
;
14912 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14914 if (haveF3no66noF2(pfx
) && sz
== 4) {
14915 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, False
/*!isAvx*/);
14916 goto decode_success
;
14921 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14922 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14923 Intel manual does not say anything about the usual business of
14924 the FP reg tags getting trashed whenever an MMX insn happens.
14925 So we just leave them alone.
14927 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14928 modrm
= getUChar(delta
);
14929 if (!epartIsReg(modrm
)) {
14930 /* do_MMX_preamble(); Intel docs don't specify this */
14931 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14932 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
14933 DIP("movntq %s,%s\n", dis_buf
,
14934 nameMMXReg(gregLO3ofRM(modrm
)));
14936 goto decode_success
;
14938 /* else fall through */
14940 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14941 if (have66noF2noF3(pfx
) && sz
== 2) {
14942 modrm
= getUChar(delta
);
14943 if (!epartIsReg(modrm
)) {
14944 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14945 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
14946 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14947 DIP("movntdq %s,%s\n", dis_buf
,
14948 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14950 goto decode_success
;
14952 /* else fall through */
14957 /* 66 0F E8 = PSUBSB */
14958 if (have66noF2noF3(pfx
) && sz
== 2) {
14959 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14960 "psubsb", Iop_QSub8Sx16
, False
);
14961 goto decode_success
;
14966 /* 66 0F E9 = PSUBSW */
14967 if (have66noF2noF3(pfx
) && sz
== 2) {
14968 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14969 "psubsw", Iop_QSub16Sx8
, False
);
14970 goto decode_success
;
14975 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14976 /* 0F EA = PMINSW -- 16x4 signed min */
14977 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14979 delta
= dis_MMXop_regmem_to_reg (
14980 vbi
, pfx
, delta
, opc
, "pminsw", False
);
14981 goto decode_success
;
14983 /* 66 0F EA = PMINSW -- 16x8 signed min */
14984 if (have66noF2noF3(pfx
) && sz
== 2) {
14985 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14986 "pminsw", Iop_Min16Sx8
, False
);
14987 goto decode_success
;
14992 /* 66 0F EB = POR */
14993 if (have66noF2noF3(pfx
) && sz
== 2) {
14994 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "por", Iop_OrV128
);
14995 goto decode_success
;
15000 /* 66 0F EC = PADDSB */
15001 if (have66noF2noF3(pfx
) && sz
== 2) {
15002 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15003 "paddsb", Iop_QAdd8Sx16
, False
);
15004 goto decode_success
;
15009 /* 66 0F ED = PADDSW */
15010 if (have66noF2noF3(pfx
) && sz
== 2) {
15011 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15012 "paddsw", Iop_QAdd16Sx8
, False
);
15013 goto decode_success
;
15018 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15019 /* 0F EE = PMAXSW -- 16x4 signed max */
15020 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15022 delta
= dis_MMXop_regmem_to_reg (
15023 vbi
, pfx
, delta
, opc
, "pmaxsw", False
);
15024 goto decode_success
;
15026 /* 66 0F EE = PMAXSW -- 16x8 signed max */
15027 if (have66noF2noF3(pfx
) && sz
== 2) {
15028 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15029 "pmaxsw", Iop_Max16Sx8
, False
);
15030 goto decode_success
;
15035 /* 66 0F EF = PXOR */
15036 if (have66noF2noF3(pfx
) && sz
== 2) {
15037 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pxor", Iop_XorV128
);
15038 goto decode_success
;
15043 /* 66 0F F1 = PSLLW by E */
15044 if (have66noF2noF3(pfx
) && sz
== 2) {
15045 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllw", Iop_ShlN16x8
);
15046 goto decode_success
;
15051 /* 66 0F F2 = PSLLD by E */
15052 if (have66noF2noF3(pfx
) && sz
== 2) {
15053 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "pslld", Iop_ShlN32x4
);
15054 goto decode_success
;
15059 /* 66 0F F3 = PSLLQ by E */
15060 if (have66noF2noF3(pfx
) && sz
== 2) {
15061 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllq", Iop_ShlN64x2
);
15062 goto decode_success
;
15067 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15068 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
15070 if (have66noF2noF3(pfx
) && sz
== 2) {
15071 IRTemp sV
= newTemp(Ity_V128
);
15072 IRTemp dV
= newTemp(Ity_V128
);
15073 modrm
= getUChar(delta
);
15074 UInt rG
= gregOfRexRM(pfx
,modrm
);
15075 assign( dV
, getXMMReg(rG
) );
15076 if (epartIsReg(modrm
)) {
15077 UInt rE
= eregOfRexRM(pfx
,modrm
);
15078 assign( sV
, getXMMReg(rE
) );
15080 DIP("pmuludq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15082 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15083 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15085 DIP("pmuludq %s,%s\n", dis_buf
, nameXMMReg(rG
));
15087 putXMMReg( rG
, mkexpr(math_PMULUDQ_128( sV
, dV
)) );
15088 goto decode_success
;
15090 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15091 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15092 0 to form 64-bit result */
15093 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15094 IRTemp sV
= newTemp(Ity_I64
);
15095 IRTemp dV
= newTemp(Ity_I64
);
15096 t1
= newTemp(Ity_I32
);
15097 t0
= newTemp(Ity_I32
);
15098 modrm
= getUChar(delta
);
15101 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15103 if (epartIsReg(modrm
)) {
15104 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15106 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15107 nameMMXReg(gregLO3ofRM(modrm
)));
15109 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15110 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15112 DIP("pmuludq %s,%s\n", dis_buf
,
15113 nameMMXReg(gregLO3ofRM(modrm
)));
15116 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
15117 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
15118 putMMXReg( gregLO3ofRM(modrm
),
15119 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
15120 goto decode_success
;
15125 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15126 E(xmm or mem) to G(xmm) */
15127 if (have66noF2noF3(pfx
) && sz
== 2) {
15128 IRTemp sV
= newTemp(Ity_V128
);
15129 IRTemp dV
= newTemp(Ity_V128
);
15130 modrm
= getUChar(delta
);
15131 UInt rG
= gregOfRexRM(pfx
,modrm
);
15132 if (epartIsReg(modrm
)) {
15133 UInt rE
= eregOfRexRM(pfx
,modrm
);
15134 assign( sV
, getXMMReg(rE
) );
15136 DIP("pmaddwd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15138 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15139 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15141 DIP("pmaddwd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15143 assign( dV
, getXMMReg(rG
) );
15144 putXMMReg( rG
, mkexpr(math_PMADDWD_128(dV
, sV
)) );
15145 goto decode_success
;
15150 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15151 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15152 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15154 delta
= dis_MMXop_regmem_to_reg (
15155 vbi
, pfx
, delta
, opc
, "psadbw", False
);
15156 goto decode_success
;
15158 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15159 from E(xmm or mem) to G(xmm) */
15160 if (have66noF2noF3(pfx
) && sz
== 2) {
15161 IRTemp sV
= newTemp(Ity_V128
);
15162 IRTemp dV
= newTemp(Ity_V128
);
15163 modrm
= getUChar(delta
);
15164 UInt rG
= gregOfRexRM(pfx
,modrm
);
15165 if (epartIsReg(modrm
)) {
15166 UInt rE
= eregOfRexRM(pfx
,modrm
);
15167 assign( sV
, getXMMReg(rE
) );
15169 DIP("psadbw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15171 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15172 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15174 DIP("psadbw %s,%s\n", dis_buf
, nameXMMReg(rG
));
15176 assign( dV
, getXMMReg(rG
) );
15177 putXMMReg( rG
, mkexpr( math_PSADBW_128 ( dV
, sV
) ) );
15179 goto decode_success
;
15184 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15185 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15186 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15188 delta
= dis_MMX( &ok
, vbi
, pfx
, sz
, delta
-1 );
15189 if (ok
) goto decode_success
;
15191 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15192 if (have66noF2noF3(pfx
) && sz
== 2 && epartIsReg(getUChar(delta
))) {
15193 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15194 goto decode_success
;
15199 /* 66 0F F8 = PSUBB */
15200 if (have66noF2noF3(pfx
) && sz
== 2) {
15201 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15202 "psubb", Iop_Sub8x16
, False
);
15203 goto decode_success
;
15208 /* 66 0F F9 = PSUBW */
15209 if (have66noF2noF3(pfx
) && sz
== 2) {
15210 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15211 "psubw", Iop_Sub16x8
, False
);
15212 goto decode_success
;
15217 /* 66 0F FA = PSUBD */
15218 if (have66noF2noF3(pfx
) && sz
== 2) {
15219 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15220 "psubd", Iop_Sub32x4
, False
);
15221 goto decode_success
;
15226 /* 66 0F FB = PSUBQ */
15227 if (have66noF2noF3(pfx
) && sz
== 2) {
15228 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15229 "psubq", Iop_Sub64x2
, False
);
15230 goto decode_success
;
15232 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15233 /* 0F FB = PSUBQ -- sub 64x1 */
15234 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15236 delta
= dis_MMXop_regmem_to_reg (
15237 vbi
, pfx
, delta
, opc
, "psubq", False
);
15238 goto decode_success
;
15243 /* 66 0F FC = PADDB */
15244 if (have66noF2noF3(pfx
) && sz
== 2) {
15245 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15246 "paddb", Iop_Add8x16
, False
);
15247 goto decode_success
;
15252 /* 66 0F FD = PADDW */
15253 if (have66noF2noF3(pfx
) && sz
== 2) {
15254 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15255 "paddw", Iop_Add16x8
, False
);
15256 goto decode_success
;
15261 /* 66 0F FE = PADDD */
15262 if (have66noF2noF3(pfx
) && sz
== 2) {
15263 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15264 "paddd", Iop_Add32x4
, False
);
15265 goto decode_success
;
15270 goto decode_failure
;
15275 *decode_OK
= False
;
15284 /*------------------------------------------------------------*/
15286 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15288 /*------------------------------------------------------------*/
15290 static Long
dis_MOVDDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15291 Long delta
, Bool isAvx
)
15293 IRTemp addr
= IRTemp_INVALID
;
15296 IRTemp sV
= newTemp(Ity_V128
);
15297 IRTemp d0
= newTemp(Ity_I64
);
15298 UChar modrm
= getUChar(delta
);
15299 UInt rG
= gregOfRexRM(pfx
,modrm
);
15300 if (epartIsReg(modrm
)) {
15301 UInt rE
= eregOfRexRM(pfx
,modrm
);
15302 assign( sV
, getXMMReg(rE
) );
15303 DIP("%smovddup %s,%s\n",
15304 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
15306 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
15308 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15309 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15310 DIP("%smovddup %s,%s\n",
15311 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
15314 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15315 ( rG
, binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
15320 static Long
dis_MOVDDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15323 IRTemp addr
= IRTemp_INVALID
;
15326 IRTemp d0
= newTemp(Ity_I64
);
15327 IRTemp d1
= newTemp(Ity_I64
);
15328 UChar modrm
= getUChar(delta
);
15329 UInt rG
= gregOfRexRM(pfx
,modrm
);
15330 if (epartIsReg(modrm
)) {
15331 UInt rE
= eregOfRexRM(pfx
,modrm
);
15332 DIP("vmovddup %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
15334 assign ( d0
, getYMMRegLane64(rE
, 0) );
15335 assign ( d1
, getYMMRegLane64(rE
, 2) );
15337 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15338 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15339 assign( d1
, loadLE(Ity_I64
, binop(Iop_Add64
,
15340 mkexpr(addr
), mkU64(16))) );
15341 DIP("vmovddup %s,%s\n", dis_buf
, nameYMMReg(rG
));
15344 putYMMRegLane64( rG
, 0, mkexpr(d0
) );
15345 putYMMRegLane64( rG
, 1, mkexpr(d0
) );
15346 putYMMRegLane64( rG
, 2, mkexpr(d1
) );
15347 putYMMRegLane64( rG
, 3, mkexpr(d1
) );
15352 static Long
dis_MOVSxDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15353 Long delta
, Bool isAvx
, Bool isL
)
15355 IRTemp addr
= IRTemp_INVALID
;
15358 IRTemp sV
= newTemp(Ity_V128
);
15359 UChar modrm
= getUChar(delta
);
15360 UInt rG
= gregOfRexRM(pfx
,modrm
);
15361 IRTemp s3
, s2
, s1
, s0
;
15362 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15363 if (epartIsReg(modrm
)) {
15364 UInt rE
= eregOfRexRM(pfx
,modrm
);
15365 assign( sV
, getXMMReg(rE
) );
15366 DIP("%smovs%cdup %s,%s\n",
15367 isAvx
? "v" : "", isL
? 'l' : 'h', nameXMMReg(rE
), nameXMMReg(rG
));
15370 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15372 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
15373 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15374 DIP("%smovs%cdup %s,%s\n",
15375 isAvx
? "v" : "", isL
? 'l' : 'h', dis_buf
, nameXMMReg(rG
));
15378 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15379 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15380 ( rG
, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15381 : mkV128from32s( s3
, s3
, s1
, s1
) );
15386 static Long
dis_MOVSxDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15387 Long delta
, Bool isL
)
15389 IRTemp addr
= IRTemp_INVALID
;
15392 IRTemp sV
= newTemp(Ity_V256
);
15393 UChar modrm
= getUChar(delta
);
15394 UInt rG
= gregOfRexRM(pfx
,modrm
);
15395 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
15396 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15397 if (epartIsReg(modrm
)) {
15398 UInt rE
= eregOfRexRM(pfx
,modrm
);
15399 assign( sV
, getYMMReg(rE
) );
15400 DIP("vmovs%cdup %s,%s\n",
15401 isL
? 'l' : 'h', nameYMMReg(rE
), nameYMMReg(rG
));
15404 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15405 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15406 DIP("vmovs%cdup %s,%s\n",
15407 isL
? 'l' : 'h', dis_buf
, nameYMMReg(rG
));
15410 breakupV256to32s( sV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
15411 putYMMRegLane128( rG
, 1, isL
? mkV128from32s( s6
, s6
, s4
, s4
)
15412 : mkV128from32s( s7
, s7
, s5
, s5
) );
15413 putYMMRegLane128( rG
, 0, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15414 : mkV128from32s( s3
, s3
, s1
, s1
) );
15419 static IRTemp
math_HADDPS_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15421 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15422 IRTemp leftV
= newTemp(Ity_V128
);
15423 IRTemp rightV
= newTemp(Ity_V128
);
15424 IRTemp rm
= newTemp(Ity_I32
);
15425 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15427 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15428 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
15430 assign( leftV
, mkV128from32s( s2
, s0
, d2
, d0
) );
15431 assign( rightV
, mkV128from32s( s3
, s1
, d3
, d1
) );
15433 IRTemp res
= newTemp(Ity_V128
);
15434 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15435 assign( res
, triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
15436 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15441 static IRTemp
math_HADDPD_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15443 IRTemp s1
, s0
, d1
, d0
;
15444 IRTemp leftV
= newTemp(Ity_V128
);
15445 IRTemp rightV
= newTemp(Ity_V128
);
15446 IRTemp rm
= newTemp(Ity_I32
);
15447 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
15449 breakupV128to64s( sV
, &s1
, &s0
);
15450 breakupV128to64s( dV
, &d1
, &d0
);
15452 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
15453 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
15455 IRTemp res
= newTemp(Ity_V128
);
15456 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15457 assign( res
, triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
15458 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15463 __attribute__((noinline
))
15465 Long
dis_ESC_0F__SSE3 ( Bool
* decode_OK
,
15466 const VexAbiInfo
* vbi
,
15467 Prefix pfx
, Int sz
, Long deltaIN
)
15469 IRTemp addr
= IRTemp_INVALID
;
15474 *decode_OK
= False
;
15476 Long delta
= deltaIN
;
15477 UChar opc
= getUChar(delta
);
15482 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15483 duplicating some lanes (2:2:0:0). */
15484 if (haveF3no66noF2(pfx
) && sz
== 4) {
15485 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15487 goto decode_success
;
15489 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15490 duplicating some lanes (0:1:0:1). */
15491 if (haveF2no66noF3(pfx
)
15492 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
15493 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15494 goto decode_success
;
15499 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15500 duplicating some lanes (3:3:1:1). */
15501 if (haveF3no66noF2(pfx
) && sz
== 4) {
15502 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15504 goto decode_success
;
15510 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15511 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15512 if (haveF2no66noF3(pfx
) && sz
== 4) {
15513 IRTemp eV
= newTemp(Ity_V128
);
15514 IRTemp gV
= newTemp(Ity_V128
);
15515 Bool isAdd
= opc
== 0x7C;
15516 const HChar
* str
= isAdd
? "add" : "sub";
15517 modrm
= getUChar(delta
);
15518 UInt rG
= gregOfRexRM(pfx
,modrm
);
15519 if (epartIsReg(modrm
)) {
15520 UInt rE
= eregOfRexRM(pfx
,modrm
);
15521 assign( eV
, getXMMReg(rE
) );
15522 DIP("h%sps %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15525 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15526 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15527 DIP("h%sps %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15531 assign( gV
, getXMMReg(rG
) );
15532 putXMMReg( rG
, mkexpr( math_HADDPS_128 ( gV
, eV
, isAdd
) ) );
15533 goto decode_success
;
15535 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15536 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15537 if (have66noF2noF3(pfx
) && sz
== 2) {
15538 IRTemp eV
= newTemp(Ity_V128
);
15539 IRTemp gV
= newTemp(Ity_V128
);
15540 Bool isAdd
= opc
== 0x7C;
15541 const HChar
* str
= isAdd
? "add" : "sub";
15542 modrm
= getUChar(delta
);
15543 UInt rG
= gregOfRexRM(pfx
,modrm
);
15544 if (epartIsReg(modrm
)) {
15545 UInt rE
= eregOfRexRM(pfx
,modrm
);
15546 assign( eV
, getXMMReg(rE
) );
15547 DIP("h%spd %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15550 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15551 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15552 DIP("h%spd %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15556 assign( gV
, getXMMReg(rG
) );
15557 putXMMReg( rG
, mkexpr( math_HADDPD_128 ( gV
, eV
, isAdd
) ) );
15558 goto decode_success
;
15563 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15564 if (have66noF2noF3(pfx
) && sz
== 2) {
15565 IRTemp eV
= newTemp(Ity_V128
);
15566 IRTemp gV
= newTemp(Ity_V128
);
15567 modrm
= getUChar(delta
);
15568 UInt rG
= gregOfRexRM(pfx
,modrm
);
15569 if (epartIsReg(modrm
)) {
15570 UInt rE
= eregOfRexRM(pfx
,modrm
);
15571 assign( eV
, getXMMReg(rE
) );
15572 DIP("addsubpd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15575 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15576 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15577 DIP("addsubpd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15581 assign( gV
, getXMMReg(rG
) );
15582 putXMMReg( rG
, mkexpr( math_ADDSUBPD_128 ( gV
, eV
) ) );
15583 goto decode_success
;
15585 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15586 if (haveF2no66noF3(pfx
) && sz
== 4) {
15587 IRTemp eV
= newTemp(Ity_V128
);
15588 IRTemp gV
= newTemp(Ity_V128
);
15589 modrm
= getUChar(delta
);
15590 UInt rG
= gregOfRexRM(pfx
,modrm
);
15592 modrm
= getUChar(delta
);
15593 if (epartIsReg(modrm
)) {
15594 UInt rE
= eregOfRexRM(pfx
,modrm
);
15595 assign( eV
, getXMMReg(rE
) );
15596 DIP("addsubps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15599 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15600 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15601 DIP("addsubps %s,%s\n", dis_buf
, nameXMMReg(rG
));
15605 assign( gV
, getXMMReg(rG
) );
15606 putXMMReg( rG
, mkexpr( math_ADDSUBPS_128 ( gV
, eV
) ) );
15607 goto decode_success
;
15612 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15613 if (haveF2no66noF3(pfx
) && sz
== 4) {
15614 modrm
= getUChar(delta
);
15615 if (epartIsReg(modrm
)) {
15616 goto decode_failure
;
15618 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15619 putXMMReg( gregOfRexRM(pfx
,modrm
),
15620 loadLE(Ity_V128
, mkexpr(addr
)) );
15621 DIP("lddqu %s,%s\n", dis_buf
,
15622 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15625 goto decode_success
;
15630 goto decode_failure
;
15635 *decode_OK
= False
;
15644 /*------------------------------------------------------------*/
15646 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15648 /*------------------------------------------------------------*/
15651 IRTemp
math_PSHUFB_XMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15653 IRTemp halfMask
= newTemp(Ity_I64
);
15654 assign(halfMask
, mkU64(0x8F8F8F8F8F8F8F8FULL
));
15655 IRExpr
* mask
= binop(Iop_64HLtoV128
, mkexpr(halfMask
), mkexpr(halfMask
));
15656 IRTemp res
= newTemp(Ity_V128
);
15658 binop(Iop_PermOrZero8x16
,
15660 // Mask off bits [6:3] of each source operand lane
15661 binop(Iop_AndV128
, mkexpr(sV
), mask
)
15668 IRTemp
math_PSHUFB_YMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15670 IRTemp sHi
, sLo
, dHi
, dLo
;
15671 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15672 breakupV256toV128s( dV
, &dHi
, &dLo
);
15673 breakupV256toV128s( sV
, &sHi
, &sLo
);
15674 IRTemp res
= newTemp(Ity_V256
);
15675 assign(res
, binop(Iop_V128HLtoV256
,
15676 mkexpr(math_PSHUFB_XMM(dHi
, sHi
)),
15677 mkexpr(math_PSHUFB_XMM(dLo
, sLo
))));
15682 static Long
dis_PHADD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15683 Bool isAvx
, UChar opc
)
15685 IRTemp addr
= IRTemp_INVALID
;
15688 const HChar
* str
= "???";
15689 IROp opV64
= Iop_INVALID
;
15690 IROp opCatO
= Iop_CatOddLanes16x4
;
15691 IROp opCatE
= Iop_CatEvenLanes16x4
;
15692 IRTemp sV
= newTemp(Ity_V128
);
15693 IRTemp dV
= newTemp(Ity_V128
);
15694 IRTemp sHi
= newTemp(Ity_I64
);
15695 IRTemp sLo
= newTemp(Ity_I64
);
15696 IRTemp dHi
= newTemp(Ity_I64
);
15697 IRTemp dLo
= newTemp(Ity_I64
);
15698 UChar modrm
= getUChar(delta
);
15699 UInt rG
= gregOfRexRM(pfx
,modrm
);
15700 UInt rV
= isAvx
? getVexNvvvv(pfx
) : rG
;
15703 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15704 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15705 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15706 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15707 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15708 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15709 default: vassert(0);
15711 if (opc
== 0x02 || opc
== 0x06) {
15712 opCatO
= Iop_InterleaveHI32x2
;
15713 opCatE
= Iop_InterleaveLO32x2
;
15716 assign( dV
, getXMMReg(rV
) );
15718 if (epartIsReg(modrm
)) {
15719 UInt rE
= eregOfRexRM(pfx
,modrm
);
15720 assign( sV
, getXMMReg(rE
) );
15721 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15722 nameXMMReg(rE
), nameXMMReg(rG
));
15725 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15727 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
15728 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15729 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15730 dis_buf
, nameXMMReg(rG
));
15734 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
15735 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
15736 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
15737 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
15739 /* This isn't a particularly efficient way to compute the
15740 result, but at least it avoids a proliferation of IROps,
15741 hence avoids complication all the backends. */
15743 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15745 binop(Iop_64HLtoV128
,
15747 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
15748 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
)) ),
15750 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
15751 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
)) ) ) );
15756 static Long
dis_PHADD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15759 IRTemp addr
= IRTemp_INVALID
;
15762 const HChar
* str
= "???";
15763 IROp opV64
= Iop_INVALID
;
15764 IROp opCatO
= Iop_CatOddLanes16x4
;
15765 IROp opCatE
= Iop_CatEvenLanes16x4
;
15766 IRTemp sV
= newTemp(Ity_V256
);
15767 IRTemp dV
= newTemp(Ity_V256
);
15768 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15769 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15770 UChar modrm
= getUChar(delta
);
15771 UInt rG
= gregOfRexRM(pfx
,modrm
);
15772 UInt rV
= getVexNvvvv(pfx
);
15775 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15776 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15777 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15778 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15779 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15780 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15781 default: vassert(0);
15783 if (opc
== 0x02 || opc
== 0x06) {
15784 opCatO
= Iop_InterleaveHI32x2
;
15785 opCatE
= Iop_InterleaveLO32x2
;
15788 assign( dV
, getYMMReg(rV
) );
15790 if (epartIsReg(modrm
)) {
15791 UInt rE
= eregOfRexRM(pfx
,modrm
);
15792 assign( sV
, getYMMReg(rE
) );
15793 DIP("vph%s %s,%s\n", str
, nameYMMReg(rE
), nameYMMReg(rG
));
15796 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15797 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15798 DIP("vph%s %s,%s\n", str
, dis_buf
, nameYMMReg(rG
));
15802 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
15803 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
15805 /* This isn't a particularly efficient way to compute the
15806 result, but at least it avoids a proliferation of IROps,
15807 hence avoids complication all the backends. */
15810 binop(Iop_V128HLtoV256
,
15811 binop(Iop_64HLtoV128
,
15813 binop(opCatE
,mkexpr(s3
),mkexpr(s2
)),
15814 binop(opCatO
,mkexpr(s3
),mkexpr(s2
)) ),
15816 binop(opCatE
,mkexpr(d3
),mkexpr(d2
)),
15817 binop(opCatO
,mkexpr(d3
),mkexpr(d2
)) ) ),
15818 binop(Iop_64HLtoV128
,
15820 binop(opCatE
,mkexpr(s1
),mkexpr(s0
)),
15821 binop(opCatO
,mkexpr(s1
),mkexpr(s0
)) ),
15823 binop(opCatE
,mkexpr(d1
),mkexpr(d0
)),
15824 binop(opCatO
,mkexpr(d1
),mkexpr(d0
)) ) ) ) );
15829 static IRTemp
math_PMADDUBSW_128 ( IRTemp dV
, IRTemp sV
)
15831 IRTemp res
= newTemp(Ity_V128
);
15832 assign(res
, binop(Iop_PwExtUSMulQAdd8x16
, mkexpr(dV
), mkexpr(sV
)));
15838 IRTemp
math_PMADDUBSW_256 ( IRTemp dV
, IRTemp sV
)
15840 IRTemp sHi
, sLo
, dHi
, dLo
;
15841 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15842 breakupV256toV128s( dV
, &dHi
, &dLo
);
15843 breakupV256toV128s( sV
, &sHi
, &sLo
);
15844 IRTemp res
= newTemp(Ity_V256
);
15845 assign(res
, binop(Iop_V128HLtoV256
,
15846 mkexpr(math_PMADDUBSW_128(dHi
, sHi
)),
15847 mkexpr(math_PMADDUBSW_128(dLo
, sLo
))));
15852 __attribute__((noinline
))
15854 Long
dis_ESC_0F38__SupSSE3 ( Bool
* decode_OK
,
15855 const VexAbiInfo
* vbi
,
15856 Prefix pfx
, Int sz
, Long deltaIN
)
15858 IRTemp addr
= IRTemp_INVALID
;
15863 *decode_OK
= False
;
15865 Long delta
= deltaIN
;
15866 UChar opc
= getUChar(delta
);
15871 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15872 if (have66noF2noF3(pfx
)
15873 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15874 IRTemp sV
= newTemp(Ity_V128
);
15875 IRTemp dV
= newTemp(Ity_V128
);
15877 modrm
= getUChar(delta
);
15878 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
15880 if (epartIsReg(modrm
)) {
15881 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
15883 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
15884 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15886 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15887 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
15888 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15890 DIP("pshufb %s,%s\n", dis_buf
,
15891 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15894 IRTemp res
= math_PSHUFB_XMM( dV
, sV
);
15895 putXMMReg(gregOfRexRM(pfx
,modrm
), mkexpr(res
));
15896 goto decode_success
;
15898 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15899 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15900 IRTemp sV
= newTemp(Ity_I64
);
15901 IRTemp dV
= newTemp(Ity_I64
);
15903 modrm
= getUChar(delta
);
15905 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15907 if (epartIsReg(modrm
)) {
15908 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15910 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15911 nameMMXReg(gregLO3ofRM(modrm
)));
15913 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15914 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15916 DIP("pshufb %s,%s\n", dis_buf
,
15917 nameMMXReg(gregLO3ofRM(modrm
)));
15921 gregLO3ofRM(modrm
),
15925 // Mask off bits [6:3] of each source operand lane
15926 binop(Iop_And64
, mkexpr(sV
), mkU64(0x8787878787878787ULL
))
15929 goto decode_success
;
15939 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15941 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15943 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15944 xmm) and G to G (xmm). */
15945 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15947 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15949 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15950 xmm) and G to G (xmm). */
15951 if (have66noF2noF3(pfx
)
15952 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15953 delta
= dis_PHADD_128( vbi
, pfx
, delta
, False
/*isAvx*/, opc
);
15954 goto decode_success
;
15956 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15957 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15959 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15961 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15962 mmx) and G to G (mmx). */
15963 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15965 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15967 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15968 mmx) and G to G (mmx). */
15969 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15970 const HChar
* str
= "???";
15971 IROp opV64
= Iop_INVALID
;
15972 IROp opCatO
= Iop_CatOddLanes16x4
;
15973 IROp opCatE
= Iop_CatEvenLanes16x4
;
15974 IRTemp sV
= newTemp(Ity_I64
);
15975 IRTemp dV
= newTemp(Ity_I64
);
15977 modrm
= getUChar(delta
);
15980 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15981 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15982 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15983 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15984 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15985 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15986 default: vassert(0);
15988 if (opc
== 0x02 || opc
== 0x06) {
15989 opCatO
= Iop_InterleaveHI32x2
;
15990 opCatE
= Iop_InterleaveLO32x2
;
15994 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15996 if (epartIsReg(modrm
)) {
15997 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15999 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16000 nameMMXReg(gregLO3ofRM(modrm
)));
16002 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16003 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16005 DIP("ph%s %s,%s\n", str
, dis_buf
,
16006 nameMMXReg(gregLO3ofRM(modrm
)));
16010 gregLO3ofRM(modrm
),
16012 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
16013 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
16016 goto decode_success
;
16021 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16022 Unsigned Bytes (XMM) */
16023 if (have66noF2noF3(pfx
)
16024 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16025 IRTemp sV
= newTemp(Ity_V128
);
16026 IRTemp dV
= newTemp(Ity_V128
);
16027 modrm
= getUChar(delta
);
16028 UInt rG
= gregOfRexRM(pfx
,modrm
);
16030 assign( dV
, getXMMReg(rG
) );
16032 if (epartIsReg(modrm
)) {
16033 UInt rE
= eregOfRexRM(pfx
,modrm
);
16034 assign( sV
, getXMMReg(rE
) );
16036 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
16038 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16039 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16040 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16042 DIP("pmaddubsw %s,%s\n", dis_buf
, nameXMMReg(rG
));
16045 putXMMReg( rG
, mkexpr( math_PMADDUBSW_128( dV
, sV
) ) );
16046 goto decode_success
;
16048 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16049 Unsigned Bytes (MMX) */
16050 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16051 IRTemp sV
= newTemp(Ity_I64
);
16052 IRTemp dV
= newTemp(Ity_I64
);
16053 IRTemp sVoddsSX
= newTemp(Ity_I64
);
16054 IRTemp sVevensSX
= newTemp(Ity_I64
);
16055 IRTemp dVoddsZX
= newTemp(Ity_I64
);
16056 IRTemp dVevensZX
= newTemp(Ity_I64
);
16058 modrm
= getUChar(delta
);
16060 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16062 if (epartIsReg(modrm
)) {
16063 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16065 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16066 nameMMXReg(gregLO3ofRM(modrm
)));
16068 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16069 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16071 DIP("pmaddubsw %s,%s\n", dis_buf
,
16072 nameMMXReg(gregLO3ofRM(modrm
)));
16075 /* compute dV unsigned x sV signed */
16077 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
16079 binop(Iop_SarN16x4
,
16080 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
16083 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
16085 binop(Iop_ShrN16x4
,
16086 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
16090 gregLO3ofRM(modrm
),
16091 binop(Iop_QAdd16Sx4
,
16092 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
16093 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
16096 goto decode_success
;
16103 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16104 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16105 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16106 if (have66noF2noF3(pfx
)
16107 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16108 IRTemp sV
= newTemp(Ity_V128
);
16109 IRTemp dV
= newTemp(Ity_V128
);
16110 IRTemp sHi
= newTemp(Ity_I64
);
16111 IRTemp sLo
= newTemp(Ity_I64
);
16112 IRTemp dHi
= newTemp(Ity_I64
);
16113 IRTemp dLo
= newTemp(Ity_I64
);
16114 const HChar
* str
= "???";
16118 case 0x08: laneszB
= 1; str
= "b"; break;
16119 case 0x09: laneszB
= 2; str
= "w"; break;
16120 case 0x0A: laneszB
= 4; str
= "d"; break;
16121 default: vassert(0);
16124 modrm
= getUChar(delta
);
16125 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16127 if (epartIsReg(modrm
)) {
16128 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16130 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16131 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16133 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16134 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16135 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16137 DIP("psign%s %s,%s\n", str
, dis_buf
,
16138 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16141 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16142 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16143 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16144 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16147 gregOfRexRM(pfx
,modrm
),
16148 binop(Iop_64HLtoV128
,
16149 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
16150 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
16153 goto decode_success
;
16155 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16156 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16157 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16158 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16159 IRTemp sV
= newTemp(Ity_I64
);
16160 IRTemp dV
= newTemp(Ity_I64
);
16161 const HChar
* str
= "???";
16165 case 0x08: laneszB
= 1; str
= "b"; break;
16166 case 0x09: laneszB
= 2; str
= "w"; break;
16167 case 0x0A: laneszB
= 4; str
= "d"; break;
16168 default: vassert(0);
16171 modrm
= getUChar(delta
);
16173 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16175 if (epartIsReg(modrm
)) {
16176 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16178 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16179 nameMMXReg(gregLO3ofRM(modrm
)));
16181 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16182 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16184 DIP("psign%s %s,%s\n", str
, dis_buf
,
16185 nameMMXReg(gregLO3ofRM(modrm
)));
16189 gregLO3ofRM(modrm
),
16190 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
16192 goto decode_success
;
16197 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16199 if (have66noF2noF3(pfx
)
16200 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16201 IRTemp sV
= newTemp(Ity_V128
);
16202 IRTemp dV
= newTemp(Ity_V128
);
16203 IRTemp sHi
= newTemp(Ity_I64
);
16204 IRTemp sLo
= newTemp(Ity_I64
);
16205 IRTemp dHi
= newTemp(Ity_I64
);
16206 IRTemp dLo
= newTemp(Ity_I64
);
16208 modrm
= getUChar(delta
);
16209 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16211 if (epartIsReg(modrm
)) {
16212 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16214 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16215 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16217 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16218 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16219 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16221 DIP("pmulhrsw %s,%s\n", dis_buf
,
16222 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16225 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16226 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16227 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16228 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16231 gregOfRexRM(pfx
,modrm
),
16232 binop(Iop_64HLtoV128
,
16233 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
16234 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
16237 goto decode_success
;
16239 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16241 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16242 IRTemp sV
= newTemp(Ity_I64
);
16243 IRTemp dV
= newTemp(Ity_I64
);
16245 modrm
= getUChar(delta
);
16247 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16249 if (epartIsReg(modrm
)) {
16250 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16252 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16253 nameMMXReg(gregLO3ofRM(modrm
)));
16255 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16256 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16258 DIP("pmulhrsw %s,%s\n", dis_buf
,
16259 nameMMXReg(gregLO3ofRM(modrm
)));
16263 gregLO3ofRM(modrm
),
16264 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
16266 goto decode_success
;
16273 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16274 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16275 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16276 if (have66noF2noF3(pfx
)
16277 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16278 IRTemp sV
= newTemp(Ity_V128
);
16279 const HChar
* str
= "???";
16283 case 0x1C: laneszB
= 1; str
= "b"; break;
16284 case 0x1D: laneszB
= 2; str
= "w"; break;
16285 case 0x1E: laneszB
= 4; str
= "d"; break;
16286 default: vassert(0);
16289 modrm
= getUChar(delta
);
16290 if (epartIsReg(modrm
)) {
16291 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16293 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16294 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16296 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16297 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16298 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16300 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16301 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16304 putXMMReg( gregOfRexRM(pfx
,modrm
),
16305 mkexpr(math_PABS_XMM(sV
, laneszB
)) );
16306 goto decode_success
;
16308 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16309 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16310 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16311 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16312 IRTemp sV
= newTemp(Ity_I64
);
16313 const HChar
* str
= "???";
16317 case 0x1C: laneszB
= 1; str
= "b"; break;
16318 case 0x1D: laneszB
= 2; str
= "w"; break;
16319 case 0x1E: laneszB
= 4; str
= "d"; break;
16320 default: vassert(0);
16323 modrm
= getUChar(delta
);
16326 if (epartIsReg(modrm
)) {
16327 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16329 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16330 nameMMXReg(gregLO3ofRM(modrm
)));
16332 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16333 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16335 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16336 nameMMXReg(gregLO3ofRM(modrm
)));
16339 putMMXReg( gregLO3ofRM(modrm
),
16340 mkexpr(math_PABS_MMX( sV
, laneszB
)) );
16341 goto decode_success
;
16351 *decode_OK
= False
;
16360 /*------------------------------------------------------------*/
16362 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16364 /*------------------------------------------------------------*/
16366 __attribute__((noinline
))
16368 Long
dis_ESC_0F3A__SupSSE3 ( Bool
* decode_OK
,
16369 const VexAbiInfo
* vbi
,
16370 Prefix pfx
, Int sz
, Long deltaIN
)
16373 IRTemp addr
= IRTemp_INVALID
;
16378 *decode_OK
= False
;
16380 Long delta
= deltaIN
;
16381 UChar opc
= getUChar(delta
);
16386 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16387 if (have66noF2noF3(pfx
)
16388 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16389 IRTemp sV
= newTemp(Ity_V128
);
16390 IRTemp dV
= newTemp(Ity_V128
);
16392 modrm
= getUChar(delta
);
16393 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16395 if (epartIsReg(modrm
)) {
16396 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16397 d64
= (Long
)getUChar(delta
+1);
16399 DIP("palignr $%lld,%s,%s\n", d64
,
16400 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16401 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16403 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16404 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16405 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16406 d64
= (Long
)getUChar(delta
+alen
);
16408 DIP("palignr $%lld,%s,%s\n", d64
,
16410 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16413 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, d64
);
16414 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
16415 goto decode_success
;
16417 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16418 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16419 IRTemp sV
= newTemp(Ity_I64
);
16420 IRTemp dV
= newTemp(Ity_I64
);
16421 IRTemp res
= newTemp(Ity_I64
);
16423 modrm
= getUChar(delta
);
16425 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16427 if (epartIsReg(modrm
)) {
16428 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16429 d64
= (Long
)getUChar(delta
+1);
16431 DIP("palignr $%lld,%s,%s\n", d64
,
16432 nameMMXReg(eregLO3ofRM(modrm
)),
16433 nameMMXReg(gregLO3ofRM(modrm
)));
16435 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16436 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16437 d64
= (Long
)getUChar(delta
+alen
);
16439 DIP("palignr $%lld%s,%s\n", d64
,
16441 nameMMXReg(gregLO3ofRM(modrm
)));
16445 assign( res
, mkexpr(sV
) );
16447 else if (d64
>= 1 && d64
<= 7) {
16450 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d64
)),
16451 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d64
))
16454 else if (d64
== 8) {
16455 assign( res
, mkexpr(dV
) );
16457 else if (d64
>= 9 && d64
<= 15) {
16458 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d64
-8))) );
16460 else if (d64
>= 16 && d64
<= 255) {
16461 assign( res
, mkU64(0) );
16466 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
16467 goto decode_success
;
16477 *decode_OK
= False
;
16486 /*------------------------------------------------------------*/
16488 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16490 /*------------------------------------------------------------*/
16492 __attribute__((noinline
))
16494 Long
dis_ESC_0F__SSE4 ( Bool
* decode_OK
,
16495 const VexArchInfo
* archinfo
,
16496 const VexAbiInfo
* vbi
,
16497 Prefix pfx
, Int sz
, Long deltaIN
)
16499 IRTemp addr
= IRTemp_INVALID
;
16500 IRType ty
= Ity_INVALID
;
16505 *decode_OK
= False
;
16507 Long delta
= deltaIN
;
16508 UChar opc
= getUChar(delta
);
16513 /* F3 0F B8 = POPCNT{W,L,Q}
16514 Count the number of 1 bits in a register
16516 if (haveF3noF2(pfx
) /* so both 66 and REX.W are possibilities */
16517 && (sz
== 2 || sz
== 4 || sz
== 8)) {
16518 /*IRType*/ ty
= szToITy(sz
);
16519 IRTemp src
= newTemp(ty
);
16520 modrm
= getUChar(delta
);
16521 if (epartIsReg(modrm
)) {
16522 assign(src
, getIRegE(sz
, pfx
, modrm
));
16524 DIP("popcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16525 nameIRegG(sz
, pfx
, modrm
));
16527 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16528 assign(src
, loadLE(ty
, mkexpr(addr
)));
16530 DIP("popcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16531 nameIRegG(sz
, pfx
, modrm
));
16534 IRTemp result
= gen_POPCOUNT(ty
, src
);
16535 putIRegG(sz
, pfx
, modrm
, mkexpr(result
));
16537 // Update flags. This is pretty lame .. perhaps can do better
16538 // if this turns out to be performance critical.
16539 // O S A C P are cleared. Z is set if SRC == 0.
16540 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16541 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16542 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16543 stmt( IRStmt_Put( OFFB_CC_DEP1
,
16547 widenUto64(mkexpr(src
)),
16549 mkU8(AMD64G_CC_SHIFT_Z
))));
16551 goto decode_success
;
16556 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16557 which we can only decode if we're sure this is a BMI1 capable cpu
16558 that supports TZCNT, since otherwise it's BSF, which behaves
16559 differently on zero source. */
16560 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16561 && (sz
== 2 || sz
== 4 || sz
== 8)
16562 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
)) {
16563 /*IRType*/ ty
= szToITy(sz
);
16564 IRTemp src
= newTemp(ty
);
16565 modrm
= getUChar(delta
);
16566 if (epartIsReg(modrm
)) {
16567 assign(src
, getIRegE(sz
, pfx
, modrm
));
16569 DIP("tzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16570 nameIRegG(sz
, pfx
, modrm
));
16572 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16573 assign(src
, loadLE(ty
, mkexpr(addr
)));
16575 DIP("tzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16576 nameIRegG(sz
, pfx
, modrm
));
16579 IRTemp res
= gen_TZCNT(ty
, src
);
16580 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16582 // Update flags. This is pretty lame .. perhaps can do better
16583 // if this turns out to be performance critical.
16584 // O S A P are cleared. Z is set if RESULT == 0.
16585 // C is set if SRC is zero.
16586 IRTemp src64
= newTemp(Ity_I64
);
16587 IRTemp res64
= newTemp(Ity_I64
);
16588 assign(src64
, widenUto64(mkexpr(src
)));
16589 assign(res64
, widenUto64(mkexpr(res
)));
16591 IRTemp oszacp
= newTemp(Ity_I64
);
16597 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16598 mkU8(AMD64G_CC_SHIFT_Z
)),
16601 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16602 mkU8(AMD64G_CC_SHIFT_C
))
16606 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16607 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16608 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16609 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16611 goto decode_success
;
16616 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16617 which we can only decode if we're sure this is an AMD cpu
16618 that supports LZCNT, since otherwise it's BSR, which behaves
16619 differently. Bizarrely, my Sandy Bridge also accepts these
16620 instructions but produces different results. */
16621 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16622 && (sz
== 2 || sz
== 4 || sz
== 8)
16623 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
)) {
16624 /*IRType*/ ty
= szToITy(sz
);
16625 IRTemp src
= newTemp(ty
);
16626 modrm
= getUChar(delta
);
16627 if (epartIsReg(modrm
)) {
16628 assign(src
, getIRegE(sz
, pfx
, modrm
));
16630 DIP("lzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16631 nameIRegG(sz
, pfx
, modrm
));
16633 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16634 assign(src
, loadLE(ty
, mkexpr(addr
)));
16636 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16637 nameIRegG(sz
, pfx
, modrm
));
16640 IRTemp res
= gen_LZCNT(ty
, src
);
16641 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16643 // Update flags. This is pretty lame .. perhaps can do better
16644 // if this turns out to be performance critical.
16645 // O S A P are cleared. Z is set if RESULT == 0.
16646 // C is set if SRC is zero.
16647 IRTemp src64
= newTemp(Ity_I64
);
16648 IRTemp res64
= newTemp(Ity_I64
);
16649 assign(src64
, widenUto64(mkexpr(src
)));
16650 assign(res64
, widenUto64(mkexpr(res
)));
16652 IRTemp oszacp
= newTemp(Ity_I64
);
16658 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16659 mkU8(AMD64G_CC_SHIFT_Z
)),
16662 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16663 mkU8(AMD64G_CC_SHIFT_C
))
16667 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16668 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16669 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16670 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16672 goto decode_success
;
16682 *decode_OK
= False
;
16691 /*------------------------------------------------------------*/
16693 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16695 /*------------------------------------------------------------*/
16697 static IRTemp
math_PBLENDVB_128 ( IRTemp vecE
, IRTemp vecG
,
16698 IRTemp vec0
/*controlling mask*/,
16699 UInt gran
, IROp opSAR
)
16701 /* The tricky bit is to convert vec0 into a suitable mask, by
16702 copying the most significant bit of each lane into all positions
16704 IRTemp sh
= newTemp(Ity_I8
);
16705 assign(sh
, mkU8(8 * gran
- 1));
16707 IRTemp mask
= newTemp(Ity_V128
);
16708 assign(mask
, binop(opSAR
, mkexpr(vec0
), mkexpr(sh
)));
16710 IRTemp notmask
= newTemp(Ity_V128
);
16711 assign(notmask
, unop(Iop_NotV128
, mkexpr(mask
)));
16713 IRTemp res
= newTemp(Ity_V128
);
16714 assign(res
, binop(Iop_OrV128
,
16715 binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(mask
)),
16716 binop(Iop_AndV128
, mkexpr(vecG
), mkexpr(notmask
))));
16720 static IRTemp
math_PBLENDVB_256 ( IRTemp vecE
, IRTemp vecG
,
16721 IRTemp vec0
/*controlling mask*/,
16722 UInt gran
, IROp opSAR128
)
16724 /* The tricky bit is to convert vec0 into a suitable mask, by
16725 copying the most significant bit of each lane into all positions
16727 IRTemp sh
= newTemp(Ity_I8
);
16728 assign(sh
, mkU8(8 * gran
- 1));
16730 IRTemp vec0Hi
= IRTemp_INVALID
;
16731 IRTemp vec0Lo
= IRTemp_INVALID
;
16732 breakupV256toV128s( vec0
, &vec0Hi
, &vec0Lo
);
16734 IRTemp mask
= newTemp(Ity_V256
);
16735 assign(mask
, binop(Iop_V128HLtoV256
,
16736 binop(opSAR128
, mkexpr(vec0Hi
), mkexpr(sh
)),
16737 binop(opSAR128
, mkexpr(vec0Lo
), mkexpr(sh
))));
16739 IRTemp notmask
= newTemp(Ity_V256
);
16740 assign(notmask
, unop(Iop_NotV256
, mkexpr(mask
)));
16742 IRTemp res
= newTemp(Ity_V256
);
16743 assign(res
, binop(Iop_OrV256
,
16744 binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(mask
)),
16745 binop(Iop_AndV256
, mkexpr(vecG
), mkexpr(notmask
))));
16749 static Long
dis_VBLENDV_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16750 const HChar
*name
, UInt gran
, IROp opSAR
)
16752 IRTemp addr
= IRTemp_INVALID
;
16755 UChar modrm
= getUChar(delta
);
16756 UInt rG
= gregOfRexRM(pfx
, modrm
);
16757 UInt rV
= getVexNvvvv(pfx
);
16758 UInt rIS4
= 0xFF; /* invalid */
16759 IRTemp vecE
= newTemp(Ity_V128
);
16760 IRTemp vecV
= newTemp(Ity_V128
);
16761 IRTemp vecIS4
= newTemp(Ity_V128
);
16762 if (epartIsReg(modrm
)) {
16764 UInt rE
= eregOfRexRM(pfx
, modrm
);
16765 assign(vecE
, getXMMReg(rE
));
16766 UChar ib
= getUChar(delta
);
16767 rIS4
= (ib
>> 4) & 0xF;
16768 DIP("%s %s,%s,%s,%s\n",
16769 name
, nameXMMReg(rIS4
), nameXMMReg(rE
),
16770 nameXMMReg(rV
), nameXMMReg(rG
));
16772 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16774 assign(vecE
, loadLE(Ity_V128
, mkexpr(addr
)));
16775 UChar ib
= getUChar(delta
);
16776 rIS4
= (ib
>> 4) & 0xF;
16777 DIP("%s %s,%s,%s,%s\n",
16778 name
, nameXMMReg(rIS4
), dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
16781 assign(vecV
, getXMMReg(rV
));
16782 assign(vecIS4
, getXMMReg(rIS4
));
16783 IRTemp res
= math_PBLENDVB_128( vecE
, vecV
, vecIS4
, gran
, opSAR
);
16784 putYMMRegLoAndZU( rG
, mkexpr(res
) );
16788 static Long
dis_VBLENDV_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16789 const HChar
*name
, UInt gran
, IROp opSAR128
)
16791 IRTemp addr
= IRTemp_INVALID
;
16794 UChar modrm
= getUChar(delta
);
16795 UInt rG
= gregOfRexRM(pfx
, modrm
);
16796 UInt rV
= getVexNvvvv(pfx
);
16797 UInt rIS4
= 0xFF; /* invalid */
16798 IRTemp vecE
= newTemp(Ity_V256
);
16799 IRTemp vecV
= newTemp(Ity_V256
);
16800 IRTemp vecIS4
= newTemp(Ity_V256
);
16801 if (epartIsReg(modrm
)) {
16803 UInt rE
= eregOfRexRM(pfx
, modrm
);
16804 assign(vecE
, getYMMReg(rE
));
16805 UChar ib
= getUChar(delta
);
16806 rIS4
= (ib
>> 4) & 0xF;
16807 DIP("%s %s,%s,%s,%s\n",
16808 name
, nameYMMReg(rIS4
), nameYMMReg(rE
),
16809 nameYMMReg(rV
), nameYMMReg(rG
));
16811 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16813 assign(vecE
, loadLE(Ity_V256
, mkexpr(addr
)));
16814 UChar ib
= getUChar(delta
);
16815 rIS4
= (ib
>> 4) & 0xF;
16816 DIP("%s %s,%s,%s,%s\n",
16817 name
, nameYMMReg(rIS4
), dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
16820 assign(vecV
, getYMMReg(rV
));
16821 assign(vecIS4
, getYMMReg(rIS4
));
16822 IRTemp res
= math_PBLENDVB_256( vecE
, vecV
, vecIS4
, gran
, opSAR128
);
16823 putYMMReg( rG
, mkexpr(res
) );
16827 static void finish_xTESTy ( IRTemp andV
, IRTemp andnV
, Int sign
)
16829 /* Set Z=1 iff (vecE & vecG) == 0--(128)--0
16830 Set C=1 iff (vecE & not vecG) == 0--(128)--0
16832 For the case `sign == 0`, be careful to use only IROps that can be
16833 instrumented exactly by memcheck. This is because PTEST is used for
16834 __builtin_strcmp in gcc14. See
16835 https://bugzilla.redhat.com/show_bug.cgi?id=2257546
16838 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16840 /* andV resp. andnV, are reduced to 64-bit values by or-ing the top
16841 and bottom 64-bits together. It relies on this trick:
16843 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16845 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16846 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16848 and so the OR of the above 2 exprs produces
16849 [a OR b, a OR b], from which we simply take the lower half.
16851 IRTemp and64
= newTemp(Ity_I64
);
16852 IRTemp andn64
= newTemp(Ity_I64
);
16857 binop(Iop_InterleaveLO64x2
,
16858 mkexpr(andV
), mkexpr(andV
)),
16859 binop(Iop_InterleaveHI64x2
,
16860 mkexpr(andV
), mkexpr(andV
)))));
16865 binop(Iop_InterleaveLO64x2
,
16866 mkexpr(andnV
), mkexpr(andnV
)),
16867 binop(Iop_InterleaveHI64x2
,
16868 mkexpr(andnV
), mkexpr(andnV
)))));
16870 // Make z64 and c64 be either all-0s or all-1s
16871 IRTemp z64
= newTemp(Ity_I64
);
16872 IRTemp c64
= newTemp(Ity_I64
);
16875 /* When only interested in the most significant bit, just copy bit 63
16876 into all bit positions, then invert. */
16879 binop(Iop_Sar64
, mkexpr(and64
), mkU8(63))));
16883 binop(Iop_Sar64
, mkexpr(andn64
), mkU8(63))));
16884 } else if (sign
== 32) {
16885 /* If we're interested into bits 63 and 31, OR bit 31 into bit 63, copy
16886 bit 63 into all bit positions, then invert. */
16887 IRTemp and3264
= newTemp(Ity_I64
);
16888 assign(and3264
, binop(Iop_Or64
, mkexpr(and64
),
16889 binop(Iop_Shl64
, mkexpr(and64
), mkU8(32))));
16892 binop(Iop_Sar64
, mkexpr(and3264
), mkU8(63))));
16894 IRTemp andn3264
= newTemp(Ity_I64
);
16895 assign(andn3264
, binop(Iop_Or64
, mkexpr(andn64
),
16896 binop(Iop_Shl64
, mkexpr(andn64
), mkU8(32))));
16899 binop(Iop_Sar64
, mkexpr(andn3264
), mkU8(63))));
16901 vassert(sign
== 0);
16902 assign(z64
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(and64
), mkU64(0)),
16903 mkU64(~0ULL), mkU64(0ULL)));
16904 assign(c64
, IRExpr_ITE(binop(Iop_CmpEQ64
, mkexpr(andn64
), mkU64(0)),
16905 mkU64(~0ULL), mkU64(0ULL)));
16908 /* And finally, slice out the Z and C flags and set the flags
16909 thunk to COPY for them. OSAP are set to zero. */
16910 IRTemp newOSZACP
= newTemp(Ity_I64
);
16913 binop(Iop_And64
, mkexpr(z64
), mkU64(AMD64G_CC_MASK_Z
)),
16914 binop(Iop_And64
, mkexpr(c64
), mkU64(AMD64G_CC_MASK_C
))));
16916 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(newOSZACP
)));
16917 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16918 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16919 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16923 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16924 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16925 static Long
dis_xTESTy_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16926 Long delta
, Bool isAvx
, Int sign
)
16928 IRTemp addr
= IRTemp_INVALID
;
16931 UChar modrm
= getUChar(delta
);
16932 UInt rG
= gregOfRexRM(pfx
, modrm
);
16933 IRTemp vecE
= newTemp(Ity_V128
);
16934 IRTemp vecG
= newTemp(Ity_V128
);
16936 if ( epartIsReg(modrm
) ) {
16937 UInt rE
= eregOfRexRM(pfx
, modrm
);
16938 assign(vecE
, getXMMReg(rE
));
16940 DIP( "%s%stest%s %s,%s\n",
16941 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16942 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16943 nameXMMReg(rE
), nameXMMReg(rG
) );
16945 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16947 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16948 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
16950 DIP( "%s%stest%s %s,%s\n",
16951 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16952 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16953 dis_buf
, nameXMMReg(rG
) );
16956 assign(vecG
, getXMMReg(rG
));
16958 /* Set Z=1 iff (vecE & vecG) == 0
16959 Set C=1 iff (vecE & not vecG) == 0
16962 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16963 IRTemp andV
= newTemp(Ity_V128
);
16964 IRTemp andnV
= newTemp(Ity_V128
);
16965 assign(andV
, binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(vecG
)));
16966 assign(andnV
, binop(Iop_AndV128
,
16967 mkexpr(vecE
), unop(Iop_NotV128
, mkexpr(vecG
))));
16969 finish_xTESTy ( andV
, andnV
, sign
);
16974 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16975 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16976 static Long
dis_xTESTy_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16977 Long delta
, Int sign
)
16979 IRTemp addr
= IRTemp_INVALID
;
16982 UChar modrm
= getUChar(delta
);
16983 UInt rG
= gregOfRexRM(pfx
, modrm
);
16984 IRTemp vecE
= newTemp(Ity_V256
);
16985 IRTemp vecG
= newTemp(Ity_V256
);
16987 if ( epartIsReg(modrm
) ) {
16988 UInt rE
= eregOfRexRM(pfx
, modrm
);
16989 assign(vecE
, getYMMReg(rE
));
16991 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
16992 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16993 nameYMMReg(rE
), nameYMMReg(rG
) );
16995 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16996 assign(vecE
, loadLE( Ity_V256
, mkexpr(addr
) ));
16998 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
16999 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
17000 dis_buf
, nameYMMReg(rG
) );
17003 assign(vecG
, getYMMReg(rG
));
17005 /* Set Z=1 iff (vecE & vecG) == 0
17006 Set C=1 iff (vecE & not vecG) == 0
17009 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
17010 IRTemp andV
= newTemp(Ity_V256
);
17011 IRTemp andnV
= newTemp(Ity_V256
);
17012 assign(andV
, binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(vecG
)));
17013 assign(andnV
, binop(Iop_AndV256
,
17014 mkexpr(vecE
), unop(Iop_NotV256
, mkexpr(vecG
))));
17016 IRTemp andVhi
= IRTemp_INVALID
;
17017 IRTemp andVlo
= IRTemp_INVALID
;
17018 IRTemp andnVhi
= IRTemp_INVALID
;
17019 IRTemp andnVlo
= IRTemp_INVALID
;
17020 breakupV256toV128s( andV
, &andVhi
, &andVlo
);
17021 breakupV256toV128s( andnV
, &andnVhi
, &andnVlo
);
17023 IRTemp andV128
= newTemp(Ity_V128
);
17024 IRTemp andnV128
= newTemp(Ity_V128
);
17025 assign( andV128
, binop( Iop_OrV128
, mkexpr(andVhi
), mkexpr(andVlo
) ) );
17026 assign( andnV128
, binop( Iop_OrV128
, mkexpr(andnVhi
), mkexpr(andnVlo
) ) );
17028 finish_xTESTy ( andV128
, andnV128
, sign
);
17033 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
17034 static Long
dis_VCVTPH2PS ( const VexAbiInfo
* vbi
, Prefix pfx
,
17035 Long delta
, Bool is256bit
)
17037 /* This is a width-doubling load or reg-reg move, that does conversion on the
17038 transferred data. */
17039 UChar modrm
= getUChar(delta
);
17040 UInt rG
= gregOfRexRM(pfx
, modrm
);
17041 IRTemp srcE
= newTemp(is256bit
? Ity_V128
: Ity_I64
);
17043 if (epartIsReg(modrm
)) {
17044 UInt rE
= eregOfRexRM(pfx
, modrm
);
17045 assign(srcE
, is256bit
? unop(Iop_V256toV128_0
, getYMMReg(rE
))
17046 : unop(Iop_V128to64
, getXMMReg(rE
)));
17048 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE
),
17049 (is256bit
? nameYMMReg
: nameXMMReg
)(rG
));
17053 IRTemp addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, 0);
17054 // I don't think we need an alignment check here (not 100% sure tho.)
17055 assign(srcE
, loadLE(is256bit
? Ity_V128
: Ity_I64
, mkexpr(addr
)));
17057 DIP( "vcvtph2ps %s,%s\n", dis_buf
,
17058 (is256bit
? nameYMMReg
: nameXMMReg
)(rG
));
17061 IRExpr
* res
= unop(is256bit
? Iop_F16toF32x8
: Iop_F16toF32x4
, mkexpr(srcE
));
17062 (is256bit
? putYMMReg
: putYMMRegLoAndZU
)(rG
, res
);
17068 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
17069 static Long
dis_PMOVxXBW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17070 Long delta
, Bool isAvx
, Bool xIsZ
)
17072 IRTemp addr
= IRTemp_INVALID
;
17075 IRTemp srcVec
= newTemp(Ity_V128
);
17076 UChar modrm
= getUChar(delta
);
17077 const HChar
* mbV
= isAvx
? "v" : "";
17078 const HChar how
= xIsZ
? 'z' : 's';
17079 UInt rG
= gregOfRexRM(pfx
, modrm
);
17080 if ( epartIsReg(modrm
) ) {
17081 UInt rE
= eregOfRexRM(pfx
, modrm
);
17082 assign( srcVec
, getXMMReg(rE
) );
17084 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17086 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17088 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17090 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17094 = xIsZ
/* do math for either zero or sign extend */
17095 ? binop( Iop_InterleaveLO8x16
,
17096 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17097 : binop( Iop_SarN16x8
,
17098 binop( Iop_ShlN16x8
,
17099 binop( Iop_InterleaveLO8x16
,
17100 IRExpr_Const( IRConst_V128(0) ),
17105 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17111 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17112 static Long
dis_PMOVxXBW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17113 Long delta
, Bool xIsZ
)
17115 IRTemp addr
= IRTemp_INVALID
;
17118 IRTemp srcVec
= newTemp(Ity_V128
);
17119 UChar modrm
= getUChar(delta
);
17120 UChar how
= xIsZ
? 'z' : 's';
17121 UInt rG
= gregOfRexRM(pfx
, modrm
);
17122 if ( epartIsReg(modrm
) ) {
17123 UInt rE
= eregOfRexRM(pfx
, modrm
);
17124 assign( srcVec
, getXMMReg(rE
) );
17126 DIP( "vpmov%cxbw %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17128 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17129 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17131 DIP( "vpmov%cxbw %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17134 /* First do zero extend. */
17136 = binop( Iop_V128HLtoV256
,
17137 binop( Iop_InterleaveHI8x16
,
17138 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17139 binop( Iop_InterleaveLO8x16
,
17140 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17141 /* And if needed sign extension as well. */
17143 res
= binop( Iop_SarN16x16
,
17144 binop( Iop_ShlN16x16
, res
, mkU8(8) ), mkU8(8) );
17146 putYMMReg ( rG
, res
);
17152 static Long
dis_PMOVxXWD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17153 Long delta
, Bool isAvx
, Bool xIsZ
)
17155 IRTemp addr
= IRTemp_INVALID
;
17158 IRTemp srcVec
= newTemp(Ity_V128
);
17159 UChar modrm
= getUChar(delta
);
17160 const HChar
* mbV
= isAvx
? "v" : "";
17161 const HChar how
= xIsZ
? 'z' : 's';
17162 UInt rG
= gregOfRexRM(pfx
, modrm
);
17164 if ( epartIsReg(modrm
) ) {
17165 UInt rE
= eregOfRexRM(pfx
, modrm
);
17166 assign( srcVec
, getXMMReg(rE
) );
17168 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17170 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17172 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17174 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17178 = binop( Iop_InterleaveLO16x8
,
17179 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) );
17181 res
= binop(Iop_SarN32x4
,
17182 binop(Iop_ShlN32x4
, res
, mkU8(16)), mkU8(16));
17184 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17185 ( gregOfRexRM(pfx
, modrm
), res
);
17191 static Long
dis_PMOVxXWD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17192 Long delta
, Bool xIsZ
)
17194 IRTemp addr
= IRTemp_INVALID
;
17197 IRTemp srcVec
= newTemp(Ity_V128
);
17198 UChar modrm
= getUChar(delta
);
17199 UChar how
= xIsZ
? 'z' : 's';
17200 UInt rG
= gregOfRexRM(pfx
, modrm
);
17202 if ( epartIsReg(modrm
) ) {
17203 UInt rE
= eregOfRexRM(pfx
, modrm
);
17204 assign( srcVec
, getXMMReg(rE
) );
17206 DIP( "vpmov%cxwd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17208 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17209 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17211 DIP( "vpmov%cxwd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17215 = binop( Iop_V128HLtoV256
,
17216 binop( Iop_InterleaveHI16x8
,
17217 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17218 binop( Iop_InterleaveLO16x8
,
17219 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17221 res
= binop(Iop_SarN32x8
,
17222 binop(Iop_ShlN32x8
, res
, mkU8(16)), mkU8(16));
17224 putYMMReg ( rG
, res
);
17230 static Long
dis_PMOVSXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17231 Long delta
, Bool isAvx
)
17233 IRTemp addr
= IRTemp_INVALID
;
17236 IRTemp srcBytes
= newTemp(Ity_I32
);
17237 UChar modrm
= getUChar(delta
);
17238 const HChar
* mbV
= isAvx
? "v" : "";
17239 UInt rG
= gregOfRexRM(pfx
, modrm
);
17241 if ( epartIsReg( modrm
) ) {
17242 UInt rE
= eregOfRexRM(pfx
, modrm
);
17243 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17245 DIP( "%spmovsxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17247 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17248 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17250 DIP( "%spmovsxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17253 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17254 ( rG
, binop( Iop_64HLtoV128
,
17256 unop( Iop_32HIto16
, mkexpr(srcBytes
) ) ),
17258 unop( Iop_32to16
, mkexpr(srcBytes
) ) ) ) );
17263 static Long
dis_PMOVSXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
17265 IRTemp addr
= IRTemp_INVALID
;
17268 IRTemp srcBytes
= newTemp(Ity_I64
);
17269 UChar modrm
= getUChar(delta
);
17270 UInt rG
= gregOfRexRM(pfx
, modrm
);
17271 IRTemp s3
, s2
, s1
, s0
;
17272 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17274 if ( epartIsReg( modrm
) ) {
17275 UInt rE
= eregOfRexRM(pfx
, modrm
);
17276 assign( srcBytes
, getXMMRegLane64( rE
, 0 ) );
17278 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17280 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17281 assign( srcBytes
, loadLE( Ity_I64
, mkexpr(addr
) ) );
17283 DIP( "vpmovsxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17286 breakup64to16s( srcBytes
, &s3
, &s2
, &s1
, &s0
);
17287 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17288 binop( Iop_64HLtoV128
,
17289 unop( Iop_16Sto64
, mkexpr(s3
) ),
17290 unop( Iop_16Sto64
, mkexpr(s2
) ) ),
17291 binop( Iop_64HLtoV128
,
17292 unop( Iop_16Sto64
, mkexpr(s1
) ),
17293 unop( Iop_16Sto64
, mkexpr(s0
) ) ) ) );
17298 static Long
dis_PMOVZXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17299 Long delta
, Bool isAvx
)
17301 IRTemp addr
= IRTemp_INVALID
;
17304 IRTemp srcVec
= newTemp(Ity_V128
);
17305 UChar modrm
= getUChar(delta
);
17306 const HChar
* mbV
= isAvx
? "v" : "";
17307 UInt rG
= gregOfRexRM(pfx
, modrm
);
17309 if ( epartIsReg( modrm
) ) {
17310 UInt rE
= eregOfRexRM(pfx
, modrm
);
17311 assign( srcVec
, getXMMReg(rE
) );
17313 DIP( "%spmovzxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17315 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17317 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17319 DIP( "%spmovzxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17322 IRTemp zeroVec
= newTemp( Ity_V128
);
17323 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17325 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17326 ( rG
, binop( Iop_InterleaveLO16x8
,
17328 binop( Iop_InterleaveLO16x8
,
17329 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) );
17334 static Long
dis_PMOVZXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17337 IRTemp addr
= IRTemp_INVALID
;
17340 IRTemp srcVec
= newTemp(Ity_V128
);
17341 UChar modrm
= getUChar(delta
);
17342 UInt rG
= gregOfRexRM(pfx
, modrm
);
17344 if ( epartIsReg( modrm
) ) {
17345 UInt rE
= eregOfRexRM(pfx
, modrm
);
17346 assign( srcVec
, getXMMReg(rE
) );
17348 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17350 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17352 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17354 DIP( "vpmovzxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17357 IRTemp zeroVec
= newTemp( Ity_V128
);
17358 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17360 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17361 binop( Iop_InterleaveHI16x8
,
17363 binop( Iop_InterleaveLO16x8
,
17364 mkexpr(zeroVec
), mkexpr(srcVec
) ) ),
17365 binop( Iop_InterleaveLO16x8
,
17367 binop( Iop_InterleaveLO16x8
,
17368 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17373 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17374 static Long
dis_PMOVxXDQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17375 Long delta
, Bool isAvx
, Bool xIsZ
)
17377 IRTemp addr
= IRTemp_INVALID
;
17380 IRTemp srcI64
= newTemp(Ity_I64
);
17381 IRTemp srcVec
= newTemp(Ity_V128
);
17382 UChar modrm
= getUChar(delta
);
17383 const HChar
* mbV
= isAvx
? "v" : "";
17384 const HChar how
= xIsZ
? 'z' : 's';
17385 UInt rG
= gregOfRexRM(pfx
, modrm
);
17386 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17387 thing in a V128, with arbitrary junk in the top 64 bits. Use
17388 one or both of them and let iropt clean up afterwards (as
17390 if ( epartIsReg(modrm
) ) {
17391 UInt rE
= eregOfRexRM(pfx
, modrm
);
17392 assign( srcVec
, getXMMReg(rE
) );
17393 assign( srcI64
, unop(Iop_V128to64
, mkexpr(srcVec
)) );
17395 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17397 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17398 assign( srcI64
, loadLE(Ity_I64
, mkexpr(addr
)) );
17399 assign( srcVec
, unop( Iop_64UtoV128
, mkexpr(srcI64
)) );
17401 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17405 = xIsZ
/* do math for either zero or sign extend */
17406 ? binop( Iop_InterleaveLO32x4
,
17407 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17408 : binop( Iop_64HLtoV128
,
17410 unop( Iop_64HIto32
, mkexpr(srcI64
) ) ),
17412 unop( Iop_64to32
, mkexpr(srcI64
) ) ) );
17414 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17420 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17421 static Long
dis_PMOVxXDQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17422 Long delta
, Bool xIsZ
)
17424 IRTemp addr
= IRTemp_INVALID
;
17427 IRTemp srcVec
= newTemp(Ity_V128
);
17428 UChar modrm
= getUChar(delta
);
17429 UChar how
= xIsZ
? 'z' : 's';
17430 UInt rG
= gregOfRexRM(pfx
, modrm
);
17431 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17432 thing in a V128, with arbitrary junk in the top 64 bits. Use
17433 one or both of them and let iropt clean up afterwards (as
17435 if ( epartIsReg(modrm
) ) {
17436 UInt rE
= eregOfRexRM(pfx
, modrm
);
17437 assign( srcVec
, getXMMReg(rE
) );
17439 DIP( "vpmov%cxdq %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17441 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17442 assign( srcVec
, loadLE(Ity_V128
, mkexpr(addr
)) );
17444 DIP( "vpmov%cxdq %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17449 res
= binop( Iop_V128HLtoV256
,
17450 binop( Iop_InterleaveHI32x4
,
17451 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17452 binop( Iop_InterleaveLO32x4
,
17453 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17455 IRTemp s3
, s2
, s1
, s0
;
17456 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17457 breakupV128to32s( srcVec
, &s3
, &s2
, &s1
, &s0
);
17458 res
= binop( Iop_V128HLtoV256
,
17459 binop( Iop_64HLtoV128
,
17460 unop( Iop_32Sto64
, mkexpr(s3
) ),
17461 unop( Iop_32Sto64
, mkexpr(s2
) ) ),
17462 binop( Iop_64HLtoV128
,
17463 unop( Iop_32Sto64
, mkexpr(s1
) ),
17464 unop( Iop_32Sto64
, mkexpr(s0
) ) ) );
17467 putYMMReg ( rG
, res
);
17473 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17474 static Long
dis_PMOVxXBD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17475 Long delta
, Bool isAvx
, Bool xIsZ
)
17477 IRTemp addr
= IRTemp_INVALID
;
17480 IRTemp srcVec
= newTemp(Ity_V128
);
17481 UChar modrm
= getUChar(delta
);
17482 const HChar
* mbV
= isAvx
? "v" : "";
17483 const HChar how
= xIsZ
? 'z' : 's';
17484 UInt rG
= gregOfRexRM(pfx
, modrm
);
17485 if ( epartIsReg(modrm
) ) {
17486 UInt rE
= eregOfRexRM(pfx
, modrm
);
17487 assign( srcVec
, getXMMReg(rE
) );
17489 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17491 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17493 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17495 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17498 IRTemp zeroVec
= newTemp(Ity_V128
);
17499 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17502 = binop(Iop_InterleaveLO8x16
,
17504 binop(Iop_InterleaveLO8x16
,
17505 mkexpr(zeroVec
), mkexpr(srcVec
)));
17507 res
= binop(Iop_SarN32x4
,
17508 binop(Iop_ShlN32x4
, res
, mkU8(24)), mkU8(24));
17510 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17516 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17517 static Long
dis_PMOVxXBD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17518 Long delta
, Bool xIsZ
)
17520 IRTemp addr
= IRTemp_INVALID
;
17523 IRTemp srcVec
= newTemp(Ity_V128
);
17524 UChar modrm
= getUChar(delta
);
17525 UChar how
= xIsZ
? 'z' : 's';
17526 UInt rG
= gregOfRexRM(pfx
, modrm
);
17527 if ( epartIsReg(modrm
) ) {
17528 UInt rE
= eregOfRexRM(pfx
, modrm
);
17529 assign( srcVec
, getXMMReg(rE
) );
17531 DIP( "vpmov%cxbd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17533 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17535 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17537 DIP( "vpmov%cxbd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17540 IRTemp zeroVec
= newTemp(Ity_V128
);
17541 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17544 = binop( Iop_V128HLtoV256
,
17545 binop(Iop_InterleaveHI8x16
,
17547 binop(Iop_InterleaveLO8x16
,
17548 mkexpr(zeroVec
), mkexpr(srcVec
)) ),
17549 binop(Iop_InterleaveLO8x16
,
17551 binop(Iop_InterleaveLO8x16
,
17552 mkexpr(zeroVec
), mkexpr(srcVec
)) ) );
17554 res
= binop(Iop_SarN32x8
,
17555 binop(Iop_ShlN32x8
, res
, mkU8(24)), mkU8(24));
17557 putYMMReg ( rG
, res
);
17563 /* Handles 128 bit versions of PMOVSXBQ. */
17564 static Long
dis_PMOVSXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17565 Long delta
, Bool isAvx
)
17567 IRTemp addr
= IRTemp_INVALID
;
17570 IRTemp srcBytes
= newTemp(Ity_I16
);
17571 UChar modrm
= getUChar(delta
);
17572 const HChar
* mbV
= isAvx
? "v" : "";
17573 UInt rG
= gregOfRexRM(pfx
, modrm
);
17574 if ( epartIsReg(modrm
) ) {
17575 UInt rE
= eregOfRexRM(pfx
, modrm
);
17576 assign( srcBytes
, getXMMRegLane16( rE
, 0 ) );
17578 DIP( "%spmovsxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17580 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17581 assign( srcBytes
, loadLE( Ity_I16
, mkexpr(addr
) ) );
17583 DIP( "%spmovsxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17586 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17587 ( rG
, binop( Iop_64HLtoV128
,
17589 unop( Iop_16HIto8
, mkexpr(srcBytes
) ) ),
17591 unop( Iop_16to8
, mkexpr(srcBytes
) ) ) ) );
17596 /* Handles 256 bit versions of PMOVSXBQ. */
17597 static Long
dis_PMOVSXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17600 IRTemp addr
= IRTemp_INVALID
;
17603 IRTemp srcBytes
= newTemp(Ity_I32
);
17604 UChar modrm
= getUChar(delta
);
17605 UInt rG
= gregOfRexRM(pfx
, modrm
);
17606 if ( epartIsReg(modrm
) ) {
17607 UInt rE
= eregOfRexRM(pfx
, modrm
);
17608 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17610 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17612 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17613 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17615 DIP( "vpmovsxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17619 ( rG
, binop( Iop_V128HLtoV256
,
17620 binop( Iop_64HLtoV128
,
17623 unop( Iop_32HIto16
,
17624 mkexpr(srcBytes
) ) ) ),
17627 unop( Iop_32HIto16
,
17628 mkexpr(srcBytes
) ) ) ) ),
17629 binop( Iop_64HLtoV128
,
17633 mkexpr(srcBytes
) ) ) ),
17637 mkexpr(srcBytes
) ) ) ) ) ) );
17642 /* Handles 128 bit versions of PMOVZXBQ. */
17643 static Long
dis_PMOVZXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17644 Long delta
, Bool isAvx
)
17646 IRTemp addr
= IRTemp_INVALID
;
17649 IRTemp srcVec
= newTemp(Ity_V128
);
17650 UChar modrm
= getUChar(delta
);
17651 const HChar
* mbV
= isAvx
? "v" : "";
17652 UInt rG
= gregOfRexRM(pfx
, modrm
);
17653 if ( epartIsReg(modrm
) ) {
17654 UInt rE
= eregOfRexRM(pfx
, modrm
);
17655 assign( srcVec
, getXMMReg(rE
) );
17657 DIP( "%spmovzxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17659 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17661 unop( Iop_32UtoV128
,
17662 unop( Iop_16Uto32
, loadLE( Ity_I16
, mkexpr(addr
) ))));
17664 DIP( "%spmovzxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17667 IRTemp zeroVec
= newTemp(Ity_V128
);
17668 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17670 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17671 ( rG
, binop( Iop_InterleaveLO8x16
,
17673 binop( Iop_InterleaveLO8x16
,
17675 binop( Iop_InterleaveLO8x16
,
17676 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17681 /* Handles 256 bit versions of PMOVZXBQ. */
17682 static Long
dis_PMOVZXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17685 IRTemp addr
= IRTemp_INVALID
;
17688 IRTemp srcVec
= newTemp(Ity_V128
);
17689 UChar modrm
= getUChar(delta
);
17690 UInt rG
= gregOfRexRM(pfx
, modrm
);
17691 if ( epartIsReg(modrm
) ) {
17692 UInt rE
= eregOfRexRM(pfx
, modrm
);
17693 assign( srcVec
, getXMMReg(rE
) );
17695 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17697 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17699 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) )));
17701 DIP( "vpmovzxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17704 IRTemp zeroVec
= newTemp(Ity_V128
);
17705 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17708 ( rG
, binop( Iop_V128HLtoV256
,
17709 binop( Iop_InterleaveHI8x16
,
17711 binop( Iop_InterleaveLO8x16
,
17713 binop( Iop_InterleaveLO8x16
,
17714 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ),
17715 binop( Iop_InterleaveLO8x16
,
17717 binop( Iop_InterleaveLO8x16
,
17719 binop( Iop_InterleaveLO8x16
,
17720 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) )
17726 static Long
dis_PHMINPOSUW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17727 Long delta
, Bool isAvx
)
17729 IRTemp addr
= IRTemp_INVALID
;
17732 UChar modrm
= getUChar(delta
);
17733 const HChar
* mbV
= isAvx
? "v" : "";
17734 IRTemp sV
= newTemp(Ity_V128
);
17735 IRTemp sHi
= newTemp(Ity_I64
);
17736 IRTemp sLo
= newTemp(Ity_I64
);
17737 IRTemp dLo
= newTemp(Ity_I64
);
17738 UInt rG
= gregOfRexRM(pfx
,modrm
);
17739 if (epartIsReg(modrm
)) {
17740 UInt rE
= eregOfRexRM(pfx
,modrm
);
17741 assign( sV
, getXMMReg(rE
) );
17743 DIP("%sphminposuw %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
17745 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17747 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
17748 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
17750 DIP("%sphminposuw %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
));
17752 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
17753 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
17754 assign( dLo
, mkIRExprCCall(
17755 Ity_I64
, 0/*regparms*/,
17756 "amd64g_calculate_sse_phminposuw",
17757 &amd64g_calculate_sse_phminposuw
,
17758 mkIRExprVec_2( mkexpr(sLo
), mkexpr(sHi
) )
17760 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17761 (rG
, unop(Iop_64UtoV128
, mkexpr(dLo
)));
17766 static Long
dis_AESx ( const VexAbiInfo
* vbi
, Prefix pfx
,
17767 Long delta
, Bool isAvx
, UChar opc
)
17769 IRTemp addr
= IRTemp_INVALID
;
17772 UChar modrm
= getUChar(delta
);
17773 UInt rG
= gregOfRexRM(pfx
, modrm
);
17775 UInt regNoR
= (isAvx
&& opc
!= 0xDB) ? getVexNvvvv(pfx
) : rG
;
17777 /* This is a nasty kludge. We need to pass 2 x V128 to the
17778 helper. Since we can't do that, use a dirty
17779 helper to compute the results directly from the XMM regs in
17780 the guest state. That means for the memory case, we need to
17781 move the left operand into a pseudo-register (XMM16, let's
17783 if (epartIsReg(modrm
)) {
17784 regNoL
= eregOfRexRM(pfx
, modrm
);
17787 regNoL
= 16; /* use XMM16 as an intermediary */
17788 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17789 /* alignment check needed ???? */
17790 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17794 void* fn
= &amd64g_dirtyhelper_AES
;
17795 const HChar
* nm
= "amd64g_dirtyhelper_AES";
17797 /* Round up the arguments. Note that this is a kludge -- the
17798 use of mkU64 rather than mkIRExpr_HWord implies the
17799 assumption that the host's word size is 64-bit. */
17800 UInt gstOffD
= ymmGuestRegOffset(rG
);
17801 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17802 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17803 IRExpr
* opc4
= mkU64(opc
);
17804 IRExpr
* gstOffDe
= mkU64(gstOffD
);
17805 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17806 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17808 = mkIRExprVec_5( IRExpr_GSPTR(), opc4
, gstOffDe
, gstOffLe
, gstOffRe
);
17810 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17811 /* It's not really a dirty call, but we can't use the clean helper
17812 mechanism here for the very lame reason that we can't pass 2 x
17813 V128s by value to a helper. Hence this roundabout scheme. */
17815 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17816 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17817 the second for !isAvx or the third for isAvx.
17818 AESIMC (0xDB) reads the first register, and writes the second. */
17819 d
->fxState
[0].fx
= Ifx_Read
;
17820 d
->fxState
[0].offset
= gstOffL
;
17821 d
->fxState
[0].size
= sizeof(U128
);
17822 d
->fxState
[1].offset
= gstOffR
;
17823 d
->fxState
[1].size
= sizeof(U128
);
17825 d
->fxState
[1].fx
= Ifx_Write
;
17826 else if (!isAvx
|| rG
== regNoR
)
17827 d
->fxState
[1].fx
= Ifx_Modify
;
17829 d
->fxState
[1].fx
= Ifx_Read
;
17831 d
->fxState
[2].fx
= Ifx_Write
;
17832 d
->fxState
[2].offset
= gstOffD
;
17833 d
->fxState
[2].size
= sizeof(U128
);
17836 stmt( IRStmt_Dirty(d
) );
17838 const HChar
* opsuf
;
17840 case 0xDC: opsuf
= "enc"; break;
17841 case 0XDD: opsuf
= "enclast"; break;
17842 case 0xDE: opsuf
= "dec"; break;
17843 case 0xDF: opsuf
= "declast"; break;
17844 case 0xDB: opsuf
= "imc"; break;
17845 default: vassert(0);
17847 DIP("%saes%s %s,%s%s%s\n", isAvx
? "v" : "", opsuf
,
17848 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17849 nameXMMReg(regNoR
),
17850 (isAvx
&& opc
!= 0xDB) ? "," : "",
17851 (isAvx
&& opc
!= 0xDB) ? nameXMMReg(rG
) : "");
17854 putYMMRegLane128( rG
, 1, mkV128(0) );
17858 static Long
dis_AESKEYGENASSIST ( const VexAbiInfo
* vbi
, Prefix pfx
,
17859 Long delta
, Bool isAvx
)
17861 IRTemp addr
= IRTemp_INVALID
;
17864 UChar modrm
= getUChar(delta
);
17866 UInt regNoR
= gregOfRexRM(pfx
, modrm
);
17869 /* This is a nasty kludge. See AESENC et al. instructions. */
17870 modrm
= getUChar(delta
);
17871 if (epartIsReg(modrm
)) {
17872 regNoL
= eregOfRexRM(pfx
, modrm
);
17873 imm
= getUChar(delta
+1);
17876 regNoL
= 16; /* use XMM16 as an intermediary */
17877 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
17878 /* alignment check ???? . */
17879 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17880 imm
= getUChar(delta
+alen
);
17884 /* Who ya gonna call? Presumably not Ghostbusters. */
17885 void* fn
= &amd64g_dirtyhelper_AESKEYGENASSIST
;
17886 const HChar
* nm
= "amd64g_dirtyhelper_AESKEYGENASSIST";
17888 /* Round up the arguments. Note that this is a kludge -- the
17889 use of mkU64 rather than mkIRExpr_HWord implies the
17890 assumption that the host's word size is 64-bit. */
17891 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17892 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17894 IRExpr
* imme
= mkU64(imm
& 0xFF);
17895 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17896 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17898 = mkIRExprVec_4( IRExpr_GSPTR(), imme
, gstOffLe
, gstOffRe
);
17900 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17901 /* It's not really a dirty call, but we can't use the clean helper
17902 mechanism here for the very lame reason that we can't pass 2 x
17903 V128s by value to a helper. Hence this roundabout scheme. */
17905 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17906 d
->fxState
[0].fx
= Ifx_Read
;
17907 d
->fxState
[0].offset
= gstOffL
;
17908 d
->fxState
[0].size
= sizeof(U128
);
17909 d
->fxState
[1].fx
= Ifx_Write
;
17910 d
->fxState
[1].offset
= gstOffR
;
17911 d
->fxState
[1].size
= sizeof(U128
);
17912 stmt( IRStmt_Dirty(d
) );
17914 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx
? "v" : "", (UInt
)imm
,
17915 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17916 nameXMMReg(regNoR
));
17918 putYMMRegLane128( regNoR
, 1, mkV128(0) );
17923 __attribute__((noinline
))
17925 Long
dis_ESC_0F38__SSE4 ( Bool
* decode_OK
,
17926 const VexAbiInfo
* vbi
,
17927 Prefix pfx
, Int sz
, Long deltaIN
)
17929 IRTemp addr
= IRTemp_INVALID
;
17934 *decode_OK
= False
;
17936 Long delta
= deltaIN
;
17937 UChar opc
= getUChar(delta
);
17944 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17945 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17946 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17947 Blend at various granularities, with XMM0 (implicit operand)
17948 providing the controlling mask.
17950 if (have66noF2noF3(pfx
) && sz
== 2) {
17951 modrm
= getUChar(delta
);
17953 const HChar
* nm
= NULL
;
17955 IROp opSAR
= Iop_INVALID
;
17958 nm
= "pblendvb"; gran
= 1; opSAR
= Iop_SarN8x16
;
17961 nm
= "blendvps"; gran
= 4; opSAR
= Iop_SarN32x4
;
17964 nm
= "blendvpd"; gran
= 8; opSAR
= Iop_SarN64x2
;
17969 IRTemp vecE
= newTemp(Ity_V128
);
17970 IRTemp vecG
= newTemp(Ity_V128
);
17971 IRTemp vec0
= newTemp(Ity_V128
);
17973 if ( epartIsReg(modrm
) ) {
17974 assign(vecE
, getXMMReg(eregOfRexRM(pfx
, modrm
)));
17976 DIP( "%s %s,%s\n", nm
,
17977 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
17978 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17980 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17981 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
17982 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
17984 DIP( "%s %s,%s\n", nm
,
17985 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17988 assign(vecG
, getXMMReg(gregOfRexRM(pfx
, modrm
)));
17989 assign(vec0
, getXMMReg(0));
17991 IRTemp res
= math_PBLENDVB_128( vecE
, vecG
, vec0
, gran
, opSAR
);
17992 putXMMReg(gregOfRexRM(pfx
, modrm
), mkexpr(res
));
17994 goto decode_success
;
17999 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
18000 Logical compare (set ZF and CF from AND/ANDN of the operands) */
18001 if (have66noF2noF3(pfx
)
18002 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
18003 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, False
/*!isAvx*/, 0 );
18004 goto decode_success
;
18009 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
18010 Packed Move with Sign Extend from Byte to Word (XMM) */
18011 if (have66noF2noF3(pfx
) && sz
== 2) {
18012 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18013 False
/*!isAvx*/, False
/*!xIsZ*/ );
18014 goto decode_success
;
18019 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
18020 Packed Move with Sign Extend from Byte to DWord (XMM) */
18021 if (have66noF2noF3(pfx
) && sz
== 2) {
18022 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18023 False
/*!isAvx*/, False
/*!xIsZ*/ );
18024 goto decode_success
;
18029 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
18030 Packed Move with Sign Extend from Byte to QWord (XMM) */
18031 if (have66noF2noF3(pfx
) && sz
== 2) {
18032 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18033 goto decode_success
;
18038 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
18039 Packed Move with Sign Extend from Word to DWord (XMM) */
18040 if (have66noF2noF3(pfx
) && sz
== 2) {
18041 delta
= dis_PMOVxXWD_128(vbi
, pfx
, delta
,
18042 False
/*!isAvx*/, False
/*!xIsZ*/);
18043 goto decode_success
;
18048 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
18049 Packed Move with Sign Extend from Word to QWord (XMM) */
18050 if (have66noF2noF3(pfx
) && sz
== 2) {
18051 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18052 goto decode_success
;
18057 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
18058 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
18059 if (have66noF2noF3(pfx
) && sz
== 2) {
18060 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18061 False
/*!isAvx*/, False
/*!xIsZ*/ );
18062 goto decode_success
;
18067 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
18068 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
18070 /* This is a really poor translation -- could be improved if
18071 performance critical. It's a copy-paste of PMULUDQ, too. */
18072 if (have66noF2noF3(pfx
) && sz
== 2) {
18073 IRTemp sV
= newTemp(Ity_V128
);
18074 IRTemp dV
= newTemp(Ity_V128
);
18075 modrm
= getUChar(delta
);
18076 UInt rG
= gregOfRexRM(pfx
,modrm
);
18077 assign( dV
, getXMMReg(rG
) );
18078 if (epartIsReg(modrm
)) {
18079 UInt rE
= eregOfRexRM(pfx
,modrm
);
18080 assign( sV
, getXMMReg(rE
) );
18082 DIP("pmuldq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
18084 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18085 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
18087 DIP("pmuldq %s,%s\n", dis_buf
, nameXMMReg(rG
));
18090 putXMMReg( rG
, mkexpr(math_PMULDQ_128( dV
, sV
)) );
18091 goto decode_success
;
18096 /* 66 0F 38 29 = PCMPEQQ
18097 64x2 equality comparison */
18098 if (have66noF2noF3(pfx
) && sz
== 2) {
18099 /* FIXME: this needs an alignment check */
18100 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18101 "pcmpeqq", Iop_CmpEQ64x2
, False
);
18102 goto decode_success
;
18107 /* 66 0F 38 2A = MOVNTDQA
18108 "non-temporal" "streaming" load
18109 Handle like MOVDQA but only memory operand is allowed */
18110 if (have66noF2noF3(pfx
) && sz
== 2) {
18111 modrm
= getUChar(delta
);
18112 if (!epartIsReg(modrm
)) {
18113 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18114 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
18115 putXMMReg( gregOfRexRM(pfx
,modrm
),
18116 loadLE(Ity_V128
, mkexpr(addr
)) );
18117 DIP("movntdqa %s,%s\n", dis_buf
,
18118 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
18120 goto decode_success
;
18126 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18127 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18128 if (have66noF2noF3(pfx
) && sz
== 2) {
18130 modrm
= getUChar(delta
);
18132 IRTemp argL
= newTemp(Ity_V128
);
18133 IRTemp argR
= newTemp(Ity_V128
);
18135 if ( epartIsReg(modrm
) ) {
18136 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18138 DIP( "packusdw %s,%s\n",
18139 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18140 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18142 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18143 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
18144 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18146 DIP( "packusdw %s,%s\n",
18147 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18150 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18152 putXMMReg( gregOfRexRM(pfx
, modrm
),
18153 binop( Iop_QNarrowBin32Sto16Ux8
,
18154 mkexpr(argL
), mkexpr(argR
)) );
18156 goto decode_success
;
18161 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18162 Packed Move with Zero Extend from Byte to Word (XMM) */
18163 if (have66noF2noF3(pfx
) && sz
== 2) {
18164 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18165 False
/*!isAvx*/, True
/*xIsZ*/ );
18166 goto decode_success
;
18171 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18172 Packed Move with Zero Extend from Byte to DWord (XMM) */
18173 if (have66noF2noF3(pfx
) && sz
== 2) {
18174 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18175 False
/*!isAvx*/, True
/*xIsZ*/ );
18176 goto decode_success
;
18181 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18182 Packed Move with Zero Extend from Byte to QWord (XMM) */
18183 if (have66noF2noF3(pfx
) && sz
== 2) {
18184 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18185 goto decode_success
;
18190 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18191 Packed Move with Zero Extend from Word to DWord (XMM) */
18192 if (have66noF2noF3(pfx
) && sz
== 2) {
18193 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
18194 False
/*!isAvx*/, True
/*xIsZ*/ );
18195 goto decode_success
;
18200 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18201 Packed Move with Zero Extend from Word to QWord (XMM) */
18202 if (have66noF2noF3(pfx
) && sz
== 2) {
18203 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18204 goto decode_success
;
18209 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18210 Packed Move with Zero Extend from DWord to QWord (XMM) */
18211 if (have66noF2noF3(pfx
) && sz
== 2) {
18212 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18213 False
/*!isAvx*/, True
/*xIsZ*/ );
18214 goto decode_success
;
18219 /* 66 0F 38 37 = PCMPGTQ
18220 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18222 if (have66noF2noF3(pfx
) && sz
== 2) {
18223 /* FIXME: this needs an alignment check */
18224 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18225 "pcmpgtq", Iop_CmpGT64Sx2
, False
);
18226 goto decode_success
;
18232 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18233 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18235 if (have66noF2noF3(pfx
) && sz
== 2) {
18236 /* FIXME: this needs an alignment check */
18237 Bool isMAX
= opc
== 0x3C;
18238 delta
= dis_SSEint_E_to_G(
18240 isMAX
? "pmaxsb" : "pminsb",
18241 isMAX
? Iop_Max8Sx16
: Iop_Min8Sx16
,
18244 goto decode_success
;
18250 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18251 Minimum of Packed Signed Double Word Integers (XMM)
18252 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18253 Maximum of Packed Signed Double Word Integers (XMM)
18255 if (have66noF2noF3(pfx
) && sz
== 2) {
18256 /* FIXME: this needs an alignment check */
18257 Bool isMAX
= opc
== 0x3D;
18258 delta
= dis_SSEint_E_to_G(
18260 isMAX
? "pmaxsd" : "pminsd",
18261 isMAX
? Iop_Max32Sx4
: Iop_Min32Sx4
,
18264 goto decode_success
;
18270 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18271 Minimum of Packed Unsigned Word Integers (XMM)
18272 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18273 Maximum of Packed Unsigned Word Integers (XMM)
18275 if (have66noF2noF3(pfx
) && sz
== 2) {
18276 /* FIXME: this needs an alignment check */
18277 Bool isMAX
= opc
== 0x3E;
18278 delta
= dis_SSEint_E_to_G(
18280 isMAX
? "pmaxuw" : "pminuw",
18281 isMAX
? Iop_Max16Ux8
: Iop_Min16Ux8
,
18284 goto decode_success
;
18290 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18291 Minimum of Packed Unsigned Doubleword Integers (XMM)
18292 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18293 Maximum of Packed Unsigned Doubleword Integers (XMM)
18295 if (have66noF2noF3(pfx
) && sz
== 2) {
18296 /* FIXME: this needs an alignment check */
18297 Bool isMAX
= opc
== 0x3F;
18298 delta
= dis_SSEint_E_to_G(
18300 isMAX
? "pmaxud" : "pminud",
18301 isMAX
? Iop_Max32Ux4
: Iop_Min32Ux4
,
18304 goto decode_success
;
18309 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18310 32x4 integer multiply from xmm2/m128 to xmm1 */
18311 if (have66noF2noF3(pfx
) && sz
== 2) {
18313 modrm
= getUChar(delta
);
18315 IRTemp argL
= newTemp(Ity_V128
);
18316 IRTemp argR
= newTemp(Ity_V128
);
18318 if ( epartIsReg(modrm
) ) {
18319 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18321 DIP( "pmulld %s,%s\n",
18322 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18323 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18325 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18326 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
18327 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18329 DIP( "pmulld %s,%s\n",
18330 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18333 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18335 putXMMReg( gregOfRexRM(pfx
, modrm
),
18336 binop( Iop_Mul32x4
, mkexpr(argL
), mkexpr(argR
)) );
18338 goto decode_success
;
18343 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18344 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18345 if (have66noF2noF3(pfx
) && sz
== 2) {
18346 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18347 goto decode_success
;
18356 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18357 DD /r = AESENCLAST xmm1, xmm2/m128
18358 DE /r = AESDEC xmm1, xmm2/m128
18359 DF /r = AESDECLAST xmm1, xmm2/m128
18361 DB /r = AESIMC xmm1, xmm2/m128 */
18362 if (have66noF2noF3(pfx
) && sz
== 2) {
18363 delta
= dis_AESx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
18364 goto decode_success
;
18370 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18371 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18372 The decoding on this is a bit unusual.
18374 if (haveF2noF3(pfx
)
18375 && (opc
== 0xF1 || (opc
== 0xF0 && !have66(pfx
)))) {
18376 modrm
= getUChar(delta
);
18381 vassert(sz
== 2 || sz
== 4 || sz
== 8);
18383 IRType tyE
= szToITy(sz
);
18384 IRTemp valE
= newTemp(tyE
);
18386 if (epartIsReg(modrm
)) {
18387 assign(valE
, getIRegE(sz
, pfx
, modrm
));
18389 DIP("crc32b %s,%s\n", nameIRegE(sz
, pfx
, modrm
),
18390 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18392 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18393 assign(valE
, loadLE(tyE
, mkexpr(addr
)));
18395 DIP("crc32b %s,%s\n", dis_buf
,
18396 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18399 /* Somewhat funny getting/putting of the crc32 value, in order
18400 to ensure that it turns into 64-bit gets and puts. However,
18401 mask off the upper 32 bits so as to not get memcheck false
18402 +ves around the helper call. */
18403 IRTemp valG0
= newTemp(Ity_I64
);
18404 assign(valG0
, binop(Iop_And64
, getIRegG(8, pfx
, modrm
),
18405 mkU64(0xFFFFFFFF)));
18407 const HChar
* nm
= NULL
;
18410 case 1: nm
= "amd64g_calc_crc32b";
18411 fn
= &amd64g_calc_crc32b
; break;
18412 case 2: nm
= "amd64g_calc_crc32w";
18413 fn
= &amd64g_calc_crc32w
; break;
18414 case 4: nm
= "amd64g_calc_crc32l";
18415 fn
= &amd64g_calc_crc32l
; break;
18416 case 8: nm
= "amd64g_calc_crc32q";
18417 fn
= &amd64g_calc_crc32q
; break;
18420 IRTemp valG1
= newTemp(Ity_I64
);
18422 mkIRExprCCall(Ity_I64
, 0/*regparm*/, nm
, fn
,
18423 mkIRExprVec_2(mkexpr(valG0
),
18424 widenUto64(mkexpr(valE
)))));
18426 putIRegG(4, pfx
, modrm
, unop(Iop_64to32
, mkexpr(valG1
)));
18427 goto decode_success
;
18437 *decode_OK
= False
;
18446 /*------------------------------------------------------------*/
18448 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18450 /*------------------------------------------------------------*/
18452 static Long
dis_PEXTRW ( const VexAbiInfo
* vbi
, Prefix pfx
,
18453 Long delta
, Bool isAvx
)
18455 IRTemp addr
= IRTemp_INVALID
;
18456 IRTemp t0
= IRTemp_INVALID
;
18457 IRTemp t1
= IRTemp_INVALID
;
18458 IRTemp t2
= IRTemp_INVALID
;
18459 IRTemp t3
= IRTemp_INVALID
;
18460 UChar modrm
= getUChar(delta
);
18463 UInt rG
= gregOfRexRM(pfx
,modrm
);
18465 IRTemp xmm_vec
= newTemp(Ity_V128
);
18466 IRTemp d16
= newTemp(Ity_I16
);
18467 const HChar
* mbV
= isAvx
? "v" : "";
18469 vassert(0==getRexW(pfx
)); /* ensured by caller */
18470 assign( xmm_vec
, getXMMReg(rG
) );
18471 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18473 if ( epartIsReg( modrm
) ) {
18474 imm8_20
= (Int
)(getUChar(delta
+1) & 7);
18476 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18477 imm8_20
= (Int
)(getUChar(delta
+alen
) & 7);
18481 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(t0
))); break;
18482 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
18483 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(t1
))); break;
18484 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
18485 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(t2
))); break;
18486 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
18487 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(t3
))); break;
18488 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
18489 default: vassert(0);
18492 if ( epartIsReg( modrm
) ) {
18493 UInt rE
= eregOfRexRM(pfx
,modrm
);
18494 putIReg32( rE
, unop(Iop_16Uto32
, mkexpr(d16
)) );
18496 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
,
18497 nameXMMReg( rG
), nameIReg32( rE
) );
18499 storeLE( mkexpr(addr
), mkexpr(d16
) );
18501 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
, nameXMMReg( rG
), dis_buf
);
18507 static Long
dis_PEXTRD ( const VexAbiInfo
* vbi
, Prefix pfx
,
18508 Long delta
, Bool isAvx
)
18510 IRTemp addr
= IRTemp_INVALID
;
18511 IRTemp t0
= IRTemp_INVALID
;
18512 IRTemp t1
= IRTemp_INVALID
;
18513 IRTemp t2
= IRTemp_INVALID
;
18514 IRTemp t3
= IRTemp_INVALID
;
18520 IRTemp xmm_vec
= newTemp(Ity_V128
);
18521 IRTemp src_dword
= newTemp(Ity_I32
);
18522 const HChar
* mbV
= isAvx
? "v" : "";
18524 vassert(0==getRexW(pfx
)); /* ensured by caller */
18525 modrm
= getUChar(delta
);
18526 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18527 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18529 if ( epartIsReg( modrm
) ) {
18530 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
18532 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18533 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
18536 switch ( imm8_10
) {
18537 case 0: assign( src_dword
, mkexpr(t0
) ); break;
18538 case 1: assign( src_dword
, mkexpr(t1
) ); break;
18539 case 2: assign( src_dword
, mkexpr(t2
) ); break;
18540 case 3: assign( src_dword
, mkexpr(t3
) ); break;
18541 default: vassert(0);
18544 if ( epartIsReg( modrm
) ) {
18545 putIReg32( eregOfRexRM(pfx
,modrm
), mkexpr(src_dword
) );
18547 DIP( "%spextrd $%d, %s,%s\n", mbV
, imm8_10
,
18548 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18549 nameIReg32( eregOfRexRM(pfx
, modrm
) ) );
18551 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
18553 DIP( "%spextrd $%d, %s,%s\n", mbV
,
18554 imm8_10
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18560 static Long
dis_PEXTRQ ( const VexAbiInfo
* vbi
, Prefix pfx
,
18561 Long delta
, Bool isAvx
)
18563 IRTemp addr
= IRTemp_INVALID
;
18569 IRTemp xmm_vec
= newTemp(Ity_V128
);
18570 IRTemp src_qword
= newTemp(Ity_I64
);
18571 const HChar
* mbV
= isAvx
? "v" : "";
18573 vassert(1==getRexW(pfx
)); /* ensured by caller */
18574 modrm
= getUChar(delta
);
18575 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18577 if ( epartIsReg( modrm
) ) {
18578 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
18580 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18581 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
18584 switch ( imm8_0
) {
18585 case 0: assign( src_qword
, unop(Iop_V128to64
, mkexpr(xmm_vec
)) );
18587 case 1: assign( src_qword
, unop(Iop_V128HIto64
, mkexpr(xmm_vec
)) );
18589 default: vassert(0);
18592 if ( epartIsReg( modrm
) ) {
18593 putIReg64( eregOfRexRM(pfx
,modrm
), mkexpr(src_qword
) );
18595 DIP( "%spextrq $%d, %s,%s\n", mbV
, imm8_0
,
18596 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18597 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
18599 storeLE( mkexpr(addr
), mkexpr(src_qword
) );
18601 DIP( "%spextrq $%d, %s,%s\n", mbV
,
18602 imm8_0
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18607 static IRExpr
* math_CTZ32(IRExpr
*exp
)
18609 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18610 return unop(Iop_64to32
, unop(Iop_Ctz64
, unop(Iop_32Uto64
, exp
)));
18613 static Long
dis_PCMPISTRI_3A ( UChar modrm
, UInt regNoL
, UInt regNoR
,
18614 Long delta
, UChar opc
, UChar imm
,
18617 /* We only handle PCMPISTRI for now */
18618 vassert((opc
& 0x03) == 0x03);
18619 /* And only an immediate byte of 0x38 or 0x3A */
18620 vassert((imm
& ~0x02) == 0x38);
18622 /* FIXME: Is this correct when RegNoL == 16 ? */
18623 IRTemp argL
= newTemp(Ity_V128
);
18624 assign(argL
, getXMMReg(regNoL
));
18625 IRTemp argR
= newTemp(Ity_V128
);
18626 assign(argR
, getXMMReg(regNoR
));
18628 IRTemp zmaskL
= newTemp(Ity_I32
);
18629 assign(zmaskL
, unop(Iop_16Uto32
,
18630 unop(Iop_GetMSBs8x16
,
18631 binop(Iop_CmpEQ8x16
, mkexpr(argL
), mkV128(0)))));
18632 IRTemp zmaskR
= newTemp(Ity_I32
);
18633 assign(zmaskR
, unop(Iop_16Uto32
,
18634 unop(Iop_GetMSBs8x16
,
18635 binop(Iop_CmpEQ8x16
, mkexpr(argR
), mkV128(0)))));
18637 /* We want validL = ~(zmaskL | -zmaskL)
18639 But this formulation kills memcheck's validity tracking when any
18640 bits above the first "1" are invalid. So reformulate as:
18642 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18645 IRExpr
*ctzL
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskL
)));
18647 /* Generate a bool expression which is zero iff the original is
18648 zero. Do this carefully so memcheck can propagate validity bits
18651 IRTemp zmaskL_zero
= newTemp(Ity_I1
);
18652 assign(zmaskL_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskL
), mkU32(0)));
18654 IRTemp validL
= newTemp(Ity_I32
);
18655 assign(validL
, binop(Iop_Sub32
,
18656 IRExpr_ITE(mkexpr(zmaskL_zero
),
18657 binop(Iop_Shl32
, mkU32(1), ctzL
),
18661 /* And similarly for validR. */
18662 IRExpr
*ctzR
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskR
)));
18663 IRTemp zmaskR_zero
= newTemp(Ity_I1
);
18664 assign(zmaskR_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskR
), mkU32(0)));
18665 IRTemp validR
= newTemp(Ity_I32
);
18666 assign(validR
, binop(Iop_Sub32
,
18667 IRExpr_ITE(mkexpr(zmaskR_zero
),
18668 binop(Iop_Shl32
, mkU32(1), ctzR
),
18672 /* Do the actual comparison. */
18673 IRExpr
*boolResII
= unop(Iop_16Uto32
,
18674 unop(Iop_GetMSBs8x16
,
18675 binop(Iop_CmpEQ8x16
, mkexpr(argL
),
18678 /* Compute boolresII & validL & validR (i.e., if both valid, use
18679 comparison result) */
18680 IRExpr
*intRes1_a
= binop(Iop_And32
, boolResII
,
18682 mkexpr(validL
), mkexpr(validR
)));
18684 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18685 IRExpr
*intRes1_b
= unop(Iop_Not32
, binop(Iop_Or32
,
18686 mkexpr(validL
), mkexpr(validR
)));
18687 /* Otherwise, zero. */
18688 IRExpr
*intRes1
= binop(Iop_And32
, mkU32(0xFFFF),
18689 binop(Iop_Or32
, intRes1_a
, intRes1_b
));
18691 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18693 IRTemp intRes2
= newTemp(Ity_I32
);
18694 assign(intRes2
, binop(Iop_And32
, mkU32(0xFFFF),
18695 binop(Iop_Xor32
, intRes1
, mkexpr(validL
))));
18697 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18698 of the msb. Since it is clear, we return the index of the
18700 IRExpr
*newECX
= math_CTZ32(binop(Iop_Or32
,
18701 mkexpr(intRes2
), mkU32(0x10000)));
18703 /* And thats our rcx. */
18704 putIReg32(R_RCX
, newECX
);
18706 /* Now for the condition codes... */
18708 /* C == 0 iff intRes2 == 0 */
18709 IRExpr
*c_bit
= IRExpr_ITE( binop(Iop_ExpCmpNE32
, mkexpr(intRes2
),
18711 mkU32(1 << AMD64G_CC_SHIFT_C
),
18713 /* Z == 1 iff any in argL is 0 */
18714 IRExpr
*z_bit
= IRExpr_ITE( mkexpr(zmaskL_zero
),
18715 mkU32(1 << AMD64G_CC_SHIFT_Z
),
18717 /* S == 1 iff any in argR is 0 */
18718 IRExpr
*s_bit
= IRExpr_ITE( mkexpr(zmaskR_zero
),
18719 mkU32(1 << AMD64G_CC_SHIFT_S
),
18721 /* O == IntRes2[0] */
18722 IRExpr
*o_bit
= binop(Iop_Shl32
, binop(Iop_And32
, mkexpr(intRes2
),
18724 mkU8(AMD64G_CC_SHIFT_O
));
18726 /* Put them all together */
18727 IRTemp cc
= newTemp(Ity_I64
);
18728 assign(cc
, widenUto64(binop(Iop_Or32
,
18729 binop(Iop_Or32
, c_bit
, z_bit
),
18730 binop(Iop_Or32
, s_bit
, o_bit
))));
18731 stmt(IRStmt_Put(OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
)));
18732 stmt(IRStmt_Put(OFFB_CC_DEP1
, mkexpr(cc
)));
18733 stmt(IRStmt_Put(OFFB_CC_DEP2
, mkU64(0)));
18734 stmt(IRStmt_Put(OFFB_CC_NDEP
, mkU64(0)));
18739 /* This can fail, in which case it returns the original (unchanged)
18741 static Long
dis_PCMPxSTRx ( const VexAbiInfo
* vbi
, Prefix pfx
,
18742 Long delta
, Bool isAvx
, UChar opc
)
18744 Long delta0
= delta
;
18745 UInt isISTRx
= opc
& 2;
18746 UInt isxSTRM
= (opc
& 1) ^ 1;
18750 IRTemp addr
= IRTemp_INVALID
;
18754 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18755 (which is clean). Since we can't do that, use a dirty helper to
18756 compute the results directly from the XMM regs in the guest
18757 state. That means for the memory case, we need to move the left
18758 operand into a pseudo-register (XMM16, let's call it). */
18759 UChar modrm
= getUChar(delta
);
18760 if (epartIsReg(modrm
)) {
18761 regNoL
= eregOfRexRM(pfx
, modrm
);
18762 regNoR
= gregOfRexRM(pfx
, modrm
);
18763 imm
= getUChar(delta
+1);
18766 regNoL
= 16; /* use XMM16 as an intermediary */
18767 regNoR
= gregOfRexRM(pfx
, modrm
);
18768 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18769 /* No alignment check; I guess that makes sense, given that
18770 these insns are for dealing with C style strings. */
18771 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
18772 imm
= getUChar(delta
+alen
);
18776 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18778 if (regNoL
== 16) {
18779 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18780 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18781 (UInt
)imm
, dis_buf
, nameXMMReg(regNoR
));
18783 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18784 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18785 (UInt
)imm
, nameXMMReg(regNoL
), nameXMMReg(regNoR
));
18788 /* Handle special case(s). */
18789 if (imm
== 0x3A && isISTRx
&& !isxSTRM
) {
18790 return dis_PCMPISTRI_3A ( modrm
, regNoL
, regNoR
, delta
,
18791 opc
, imm
, dis_buf
);
18794 /* Now we know the XMM reg numbers for the operands, and the
18795 immediate byte. Is it one we can actually handle? Throw out any
18796 cases for which the helper function has not been verified. */
18798 case 0x00: case 0x02:
18799 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18800 case 0x10: case 0x12: case 0x14:
18801 case 0x18: case 0x1A:
18802 case 0x30: case 0x34:
18803 case 0x38: case 0x3A:
18804 case 0x40: case 0x42: case 0x44: case 0x46:
18807 case 0x70: case 0x72:
18809 // the 16-bit character versions of the above
18810 case 0x01: case 0x03:
18811 case 0x09: case 0x0B: case 0x0D:
18813 case 0x19: case 0x1B:
18814 case 0x39: case 0x3B:
18815 case 0x41: case 0x45:
18819 return delta0
; /*FAIL*/
18822 /* Who ya gonna call? Presumably not Ghostbusters. */
18823 void* fn
= &amd64g_dirtyhelper_PCMPxSTRx
;
18824 const HChar
* nm
= "amd64g_dirtyhelper_PCMPxSTRx";
18826 /* Round up the arguments. Note that this is a kludge -- the use
18827 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18828 the host's word size is 64-bit. */
18829 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
18830 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
18832 IRExpr
* opc4_and_imm
= mkU64((opc
<< 8) | (imm
& 0xFF));
18833 IRExpr
* gstOffLe
= mkU64(gstOffL
);
18834 IRExpr
* gstOffRe
= mkU64(gstOffR
);
18835 IRExpr
* edxIN
= isISTRx
? mkU64(0) : getIRegRDX(8);
18836 IRExpr
* eaxIN
= isISTRx
? mkU64(0) : getIRegRAX(8);
18838 = mkIRExprVec_6( IRExpr_GSPTR(),
18839 opc4_and_imm
, gstOffLe
, gstOffRe
, edxIN
, eaxIN
);
18841 IRTemp resT
= newTemp(Ity_I64
);
18842 IRDirty
* d
= unsafeIRDirty_1_N( resT
, 0/*regparms*/, nm
, fn
, args
);
18843 /* It's not really a dirty call, but we can't use the clean helper
18844 mechanism here for the very lame reason that we can't pass 2 x
18845 V128s by value to a helper. Hence this roundabout scheme. */
18847 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
18848 d
->fxState
[0].fx
= Ifx_Read
;
18849 d
->fxState
[0].offset
= gstOffL
;
18850 d
->fxState
[0].size
= sizeof(U128
);
18851 d
->fxState
[1].fx
= Ifx_Read
;
18852 d
->fxState
[1].offset
= gstOffR
;
18853 d
->fxState
[1].size
= sizeof(U128
);
18855 /* Declare that the helper writes XMM0. */
18857 d
->fxState
[2].fx
= Ifx_Write
;
18858 d
->fxState
[2].offset
= ymmGuestRegOffset(0);
18859 d
->fxState
[2].size
= sizeof(U128
);
18862 stmt( IRStmt_Dirty(d
) );
18864 /* Now resT[15:0] holds the new OSZACP values, so the condition
18865 codes must be updated. And for a xSTRI case, resT[31:16] holds
18866 the new ECX value, so stash that too. */
18868 putIReg64(R_RCX
, binop(Iop_And64
,
18869 binop(Iop_Shr64
, mkexpr(resT
), mkU8(16)),
18873 /* Zap the upper half of the dest reg as per AVX conventions. */
18874 if (isxSTRM
&& isAvx
)
18875 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18879 binop(Iop_And64
, mkexpr(resT
), mkU64(0xFFFF))
18881 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
18882 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
18883 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
18889 static IRTemp
math_PINSRB_128 ( IRTemp v128
, IRTemp u8
, UInt imm8
)
18891 vassert(imm8
<= 15);
18893 // Create a V128 value which has the selected byte in the
18894 // specified lane, and zeroes everywhere else.
18895 IRTemp tmp128
= newTemp(Ity_V128
);
18896 IRTemp halfshift
= newTemp(Ity_I64
);
18897 assign(halfshift
, binop(Iop_Shl64
,
18898 unop(Iop_8Uto64
, mkexpr(u8
)),
18899 mkU8(8 * (imm8
& 7))));
18901 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
18903 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
18906 UShort mask
= ~(1 << imm8
);
18907 IRTemp res
= newTemp(Ity_V128
);
18908 assign( res
, binop(Iop_OrV128
,
18910 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
18915 static IRTemp
math_PINSRD_128 ( IRTemp v128
, IRTemp u32
, UInt imm8
)
18917 IRTemp z32
= newTemp(Ity_I32
);
18918 assign(z32
, mkU32(0));
18920 /* Surround u32 with zeroes as per imm, giving us something we can
18921 OR into a suitably masked-out v128.*/
18922 IRTemp withZs
= newTemp(Ity_V128
);
18925 case 3: mask
= 0x0FFF;
18926 assign(withZs
, mkV128from32s(u32
, z32
, z32
, z32
));
18928 case 2: mask
= 0xF0FF;
18929 assign(withZs
, mkV128from32s(z32
, u32
, z32
, z32
));
18931 case 1: mask
= 0xFF0F;
18932 assign(withZs
, mkV128from32s(z32
, z32
, u32
, z32
));
18934 case 0: mask
= 0xFFF0;
18935 assign(withZs
, mkV128from32s(z32
, z32
, z32
, u32
));
18937 default: vassert(0);
18940 IRTemp res
= newTemp(Ity_V128
);
18941 assign(res
, binop( Iop_OrV128
,
18943 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18948 static IRTemp
math_PINSRQ_128 ( IRTemp v128
, IRTemp u64
, UInt imm8
)
18950 /* Surround u64 with zeroes as per imm, giving us something we can
18951 OR into a suitably masked-out v128.*/
18952 IRTemp withZs
= newTemp(Ity_V128
);
18956 assign(withZs
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(u64
)));
18958 vassert(imm8
== 1);
18960 assign( withZs
, binop(Iop_64HLtoV128
, mkexpr(u64
), mkU64(0)));
18963 IRTemp res
= newTemp(Ity_V128
);
18964 assign( res
, binop( Iop_OrV128
,
18966 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18971 static IRTemp
math_INSERTPS ( IRTemp dstV
, IRTemp toInsertD
, UInt imm8
)
18973 const IRTemp inval
= IRTemp_INVALID
;
18974 IRTemp dstDs
[4] = { inval
, inval
, inval
, inval
};
18975 breakupV128to32s( dstV
, &dstDs
[3], &dstDs
[2], &dstDs
[1], &dstDs
[0] );
18977 vassert(imm8
<= 255);
18978 dstDs
[(imm8
>> 4) & 3] = toInsertD
; /* "imm8_count_d" */
18980 UInt imm8_zmask
= (imm8
& 15);
18981 IRTemp zero_32
= newTemp(Ity_I32
);
18982 assign( zero_32
, mkU32(0) );
18983 IRTemp resV
= newTemp(Ity_V128
);
18984 assign( resV
, mkV128from32s(
18985 ((imm8_zmask
& 8) == 8) ? zero_32
: dstDs
[3],
18986 ((imm8_zmask
& 4) == 4) ? zero_32
: dstDs
[2],
18987 ((imm8_zmask
& 2) == 2) ? zero_32
: dstDs
[1],
18988 ((imm8_zmask
& 1) == 1) ? zero_32
: dstDs
[0]) );
18993 static Long
dis_PEXTRB_128_GtoE ( const VexAbiInfo
* vbi
, Prefix pfx
,
18994 Long delta
, Bool isAvx
)
18996 IRTemp addr
= IRTemp_INVALID
;
18999 IRTemp xmm_vec
= newTemp(Ity_V128
);
19000 IRTemp sel_lane
= newTemp(Ity_I32
);
19001 IRTemp shr_lane
= newTemp(Ity_I32
);
19002 const HChar
* mbV
= isAvx
? "v" : "";
19003 UChar modrm
= getUChar(delta
);
19004 IRTemp t3
, t2
, t1
, t0
;
19006 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
19007 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19008 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19010 if ( epartIsReg( modrm
) ) {
19011 imm8
= (Int
)getUChar(delta
+1);
19013 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19014 imm8
= (Int
)getUChar(delta
+alen
);
19016 switch ( (imm8
>> 2) & 3 ) {
19017 case 0: assign( sel_lane
, mkexpr(t0
) ); break;
19018 case 1: assign( sel_lane
, mkexpr(t1
) ); break;
19019 case 2: assign( sel_lane
, mkexpr(t2
) ); break;
19020 case 3: assign( sel_lane
, mkexpr(t3
) ); break;
19021 default: vassert(0);
19024 binop( Iop_Shr32
, mkexpr(sel_lane
), mkU8(((imm8
& 3)*8)) ) );
19026 if ( epartIsReg( modrm
) ) {
19027 putIReg64( eregOfRexRM(pfx
,modrm
),
19029 binop(Iop_And32
, mkexpr(shr_lane
), mkU32(255)) ) );
19031 DIP( "%spextrb $%d, %s,%s\n", mbV
, imm8
,
19032 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
19033 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
19035 storeLE( mkexpr(addr
), unop(Iop_32to8
, mkexpr(shr_lane
) ) );
19037 DIP( "%spextrb $%d,%s,%s\n", mbV
,
19038 imm8
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
19045 static IRTemp
math_DPPD_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
19047 vassert(imm8
< 256);
19048 UShort imm8_perms
[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
19049 IRTemp and_vec
= newTemp(Ity_V128
);
19050 IRTemp sum_vec
= newTemp(Ity_V128
);
19051 IRTemp rm
= newTemp(Ity_I32
);
19052 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19053 assign( and_vec
, binop( Iop_AndV128
,
19054 triop( Iop_Mul64Fx2
,
19056 mkexpr(dst_vec
), mkexpr(src_vec
) ),
19057 mkV128( imm8_perms
[ ((imm8
>> 4) & 3) ] ) ) );
19059 assign( sum_vec
, binop( Iop_Add64F0x2
,
19060 binop( Iop_InterleaveHI64x2
,
19061 mkexpr(and_vec
), mkexpr(and_vec
) ),
19062 binop( Iop_InterleaveLO64x2
,
19063 mkexpr(and_vec
), mkexpr(and_vec
) ) ) );
19064 IRTemp res
= newTemp(Ity_V128
);
19065 assign(res
, binop( Iop_AndV128
,
19066 binop( Iop_InterleaveLO64x2
,
19067 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19068 mkV128( imm8_perms
[ (imm8
& 3) ] ) ) );
19073 static IRTemp
math_DPPS_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
19075 vassert(imm8
< 256);
19076 IRTemp tmp_prod_vec
= newTemp(Ity_V128
);
19077 IRTemp prod_vec
= newTemp(Ity_V128
);
19078 IRTemp sum_vec
= newTemp(Ity_V128
);
19079 IRTemp rm
= newTemp(Ity_I32
);
19080 IRTemp v3
, v2
, v1
, v0
;
19081 v3
= v2
= v1
= v0
= IRTemp_INVALID
;
19082 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
19083 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
19084 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19087 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19088 assign( tmp_prod_vec
,
19089 binop( Iop_AndV128
,
19090 triop( Iop_Mul32Fx4
,
19091 mkexpr(rm
), mkexpr(dst_vec
), mkexpr(src_vec
) ),
19092 mkV128( imm8_perms
[((imm8
>> 4)& 15)] ) ) );
19093 breakupV128to32s( tmp_prod_vec
, &v3
, &v2
, &v1
, &v0
);
19094 assign( prod_vec
, mkV128from32s( v3
, v1
, v2
, v0
) );
19096 assign( sum_vec
, triop( Iop_Add32Fx4
,
19098 binop( Iop_InterleaveHI32x4
,
19099 mkexpr(prod_vec
), mkexpr(prod_vec
) ),
19100 binop( Iop_InterleaveLO32x4
,
19101 mkexpr(prod_vec
), mkexpr(prod_vec
) ) ) );
19103 IRTemp res
= newTemp(Ity_V128
);
19104 assign( res
, binop( Iop_AndV128
,
19105 triop( Iop_Add32Fx4
,
19107 binop( Iop_InterleaveHI32x4
,
19108 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19109 binop( Iop_InterleaveLO32x4
,
19110 mkexpr(sum_vec
), mkexpr(sum_vec
) ) ),
19111 mkV128( imm8_perms
[ (imm8
& 15) ] ) ) );
19116 static IRTemp
math_MPSADBW_128 ( IRTemp dst_vec
, IRTemp src_vec
, UInt imm8
)
19118 /* Mask out bits of the operands we don't need. This isn't
19119 strictly necessary, but it does ensure Memcheck doesn't
19120 give us any false uninitialised value errors as a
19122 UShort src_mask
[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19123 UShort dst_mask
[2] = { 0x07FF, 0x7FF0 };
19125 IRTemp src_maskV
= newTemp(Ity_V128
);
19126 IRTemp dst_maskV
= newTemp(Ity_V128
);
19127 assign(src_maskV
, mkV128( src_mask
[ imm8
& 3 ] ));
19128 assign(dst_maskV
, mkV128( dst_mask
[ (imm8
>> 2) & 1 ] ));
19130 IRTemp src_masked
= newTemp(Ity_V128
);
19131 IRTemp dst_masked
= newTemp(Ity_V128
);
19132 assign(src_masked
, binop(Iop_AndV128
, mkexpr(src_vec
), mkexpr(src_maskV
)));
19133 assign(dst_masked
, binop(Iop_AndV128
, mkexpr(dst_vec
), mkexpr(dst_maskV
)));
19135 /* Generate 4 64 bit values that we can hand to a clean helper */
19136 IRTemp sHi
= newTemp(Ity_I64
);
19137 IRTemp sLo
= newTemp(Ity_I64
);
19138 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(src_masked
)) );
19139 assign( sLo
, unop(Iop_V128to64
, mkexpr(src_masked
)) );
19141 IRTemp dHi
= newTemp(Ity_I64
);
19142 IRTemp dLo
= newTemp(Ity_I64
);
19143 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dst_masked
)) );
19144 assign( dLo
, unop(Iop_V128to64
, mkexpr(dst_masked
)) );
19146 /* Compute halves of the result separately */
19147 IRTemp resHi
= newTemp(Ity_I64
);
19148 IRTemp resLo
= newTemp(Ity_I64
);
19151 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19152 mkU64( 0x80 | (imm8
& 7) ));
19154 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19155 mkU64( 0x00 | (imm8
& 7) ));
19157 assign(resHi
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19158 "amd64g_calc_mpsadbw",
19159 &amd64g_calc_mpsadbw
, argsHi
));
19160 assign(resLo
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19161 "amd64g_calc_mpsadbw",
19162 &amd64g_calc_mpsadbw
, argsLo
));
19164 IRTemp res
= newTemp(Ity_V128
);
19165 assign(res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
)));
19169 static Long
dis_EXTRACTPS ( const VexAbiInfo
* vbi
, Prefix pfx
,
19170 Long delta
, Bool isAvx
)
19172 IRTemp addr
= IRTemp_INVALID
;
19175 UChar modrm
= getUChar(delta
);
19177 IRTemp xmm_vec
= newTemp(Ity_V128
);
19178 IRTemp src_dword
= newTemp(Ity_I32
);
19179 UInt rG
= gregOfRexRM(pfx
,modrm
);
19180 IRTemp t3
, t2
, t1
, t0
;
19181 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19183 assign( xmm_vec
, getXMMReg( rG
) );
19184 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19186 if ( epartIsReg( modrm
) ) {
19187 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19189 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19190 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19193 switch ( imm8_10
) {
19194 case 0: assign( src_dword
, mkexpr(t0
) ); break;
19195 case 1: assign( src_dword
, mkexpr(t1
) ); break;
19196 case 2: assign( src_dword
, mkexpr(t2
) ); break;
19197 case 3: assign( src_dword
, mkexpr(t3
) ); break;
19198 default: vassert(0);
19201 if ( epartIsReg( modrm
) ) {
19202 UInt rE
= eregOfRexRM(pfx
,modrm
);
19203 putIReg32( rE
, mkexpr(src_dword
) );
19205 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19206 nameXMMReg( rG
), nameIReg32( rE
) );
19208 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
19210 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19211 nameXMMReg( rG
), dis_buf
);
19218 static IRTemp
math_PCLMULQDQ( IRTemp dV
, IRTemp sV
, UInt imm8
)
19220 IRTemp t0
= newTemp(Ity_I64
);
19221 IRTemp t1
= newTemp(Ity_I64
);
19222 assign(t0
, unop((imm8
&1)? Iop_V128HIto64
: Iop_V128to64
,
19224 assign(t1
, unop((imm8
&16) ? Iop_V128HIto64
: Iop_V128to64
,
19227 IRTemp t2
= newTemp(Ity_I64
);
19228 IRTemp t3
= newTemp(Ity_I64
);
19232 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(0));
19233 assign(t2
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19234 &amd64g_calculate_pclmul
, args
));
19235 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(1));
19236 assign(t3
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19237 &amd64g_calculate_pclmul
, args
));
19239 IRTemp res
= newTemp(Ity_V128
);
19240 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)));
19245 __attribute__((noinline
))
19247 Long
dis_ESC_0F3A__SSE4 ( Bool
* decode_OK
,
19248 const VexAbiInfo
* vbi
,
19249 Prefix pfx
, Int sz
, Long deltaIN
)
19251 IRTemp addr
= IRTemp_INVALID
;
19256 *decode_OK
= False
;
19258 Long delta
= deltaIN
;
19259 UChar opc
= getUChar(delta
);
19264 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19265 if (have66noF2noF3(pfx
) && sz
== 2) {
19267 IRTemp src0
= newTemp(Ity_F32
);
19268 IRTemp src1
= newTemp(Ity_F32
);
19269 IRTemp src2
= newTemp(Ity_F32
);
19270 IRTemp src3
= newTemp(Ity_F32
);
19271 IRTemp res0
= newTemp(Ity_F32
);
19272 IRTemp res1
= newTemp(Ity_F32
);
19273 IRTemp res2
= newTemp(Ity_F32
);
19274 IRTemp res3
= newTemp(Ity_F32
);
19275 IRTemp rm
= newTemp(Ity_I32
);
19278 modrm
= getUChar(delta
);
19280 if (epartIsReg(modrm
)) {
19282 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19284 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 1 ) );
19286 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 2 ) );
19288 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 3 ) );
19289 imm
= getUChar(delta
+1);
19290 if (imm
& ~15) goto decode_failure
;
19292 DIP( "roundps $%d,%s,%s\n",
19293 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19294 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19296 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19297 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
19298 assign( src0
, loadLE(Ity_F32
,
19299 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19300 assign( src1
, loadLE(Ity_F32
,
19301 binop(Iop_Add64
, mkexpr(addr
), mkU64(4) )));
19302 assign( src2
, loadLE(Ity_F32
,
19303 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19304 assign( src3
, loadLE(Ity_F32
,
19305 binop(Iop_Add64
, mkexpr(addr
), mkU64(12) )));
19306 imm
= getUChar(delta
+alen
);
19307 if (imm
& ~15) goto decode_failure
;
19309 DIP( "roundps $%d,%s,%s\n",
19310 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19313 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19314 that encoding is the same as the encoding for IRRoundingMode,
19315 we can use that value directly in the IR as a rounding
19317 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19319 assign(res0
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src0
)) );
19320 assign(res1
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src1
)) );
19321 assign(res2
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src2
)) );
19322 assign(res3
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src3
)) );
19324 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19325 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19326 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 2, mkexpr(res2
) );
19327 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 3, mkexpr(res3
) );
19329 goto decode_success
;
19334 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19335 if (have66noF2noF3(pfx
) && sz
== 2) {
19337 IRTemp src0
= newTemp(Ity_F64
);
19338 IRTemp src1
= newTemp(Ity_F64
);
19339 IRTemp res0
= newTemp(Ity_F64
);
19340 IRTemp res1
= newTemp(Ity_F64
);
19341 IRTemp rm
= newTemp(Ity_I32
);
19344 modrm
= getUChar(delta
);
19346 if (epartIsReg(modrm
)) {
19348 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 ) );
19350 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 1 ) );
19351 imm
= getUChar(delta
+1);
19352 if (imm
& ~15) goto decode_failure
;
19354 DIP( "roundpd $%d,%s,%s\n",
19355 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19356 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19358 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19359 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
19360 assign( src0
, loadLE(Ity_F64
,
19361 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19362 assign( src1
, loadLE(Ity_F64
,
19363 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19364 imm
= getUChar(delta
+alen
);
19365 if (imm
& ~15) goto decode_failure
;
19367 DIP( "roundpd $%d,%s,%s\n",
19368 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19371 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19372 that encoding is the same as the encoding for IRRoundingMode,
19373 we can use that value directly in the IR as a rounding
19375 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19377 assign(res0
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src0
)) );
19378 assign(res1
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src1
)) );
19380 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19381 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19383 goto decode_success
;
19389 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19390 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19392 if (have66noF2noF3(pfx
) && sz
== 2) {
19394 Bool isD
= opc
== 0x0B;
19395 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
19396 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
19399 modrm
= getUChar(delta
);
19401 if (epartIsReg(modrm
)) {
19403 isD
? getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 )
19404 : getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19405 imm
= getUChar(delta
+1);
19406 if (imm
& ~15) goto decode_failure
;
19408 DIP( "rounds%c $%d,%s,%s\n",
19410 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19411 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19413 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19414 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
19415 imm
= getUChar(delta
+alen
);
19416 if (imm
& ~15) goto decode_failure
;
19418 DIP( "rounds%c $%d,%s,%s\n",
19420 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19423 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19424 that encoding is the same as the encoding for IRRoundingMode,
19425 we can use that value directly in the IR as a rounding
19427 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
19428 (imm
& 4) ? get_sse_roundingmode()
19433 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19435 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19437 goto decode_success
;
19442 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19443 Blend Packed Single Precision Floating-Point Values (XMM) */
19444 if (have66noF2noF3(pfx
) && sz
== 2) {
19447 IRTemp dst_vec
= newTemp(Ity_V128
);
19448 IRTemp src_vec
= newTemp(Ity_V128
);
19450 modrm
= getUChar(delta
);
19452 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19454 if ( epartIsReg( modrm
) ) {
19455 imm8
= (Int
)getUChar(delta
+1);
19456 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19458 DIP( "blendps $%d, %s,%s\n", imm8
,
19459 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19460 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19462 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19463 1/* imm8 is 1 byte after the amode */ );
19464 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19465 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19466 imm8
= (Int
)getUChar(delta
+alen
);
19468 DIP( "blendps $%d, %s,%s\n",
19469 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19472 putXMMReg( gregOfRexRM(pfx
, modrm
),
19473 mkexpr( math_BLENDPS_128( src_vec
, dst_vec
, imm8
) ) );
19474 goto decode_success
;
19479 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19480 Blend Packed Double Precision Floating-Point Values (XMM) */
19481 if (have66noF2noF3(pfx
) && sz
== 2) {
19484 IRTemp dst_vec
= newTemp(Ity_V128
);
19485 IRTemp src_vec
= newTemp(Ity_V128
);
19487 modrm
= getUChar(delta
);
19488 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19490 if ( epartIsReg( modrm
) ) {
19491 imm8
= (Int
)getUChar(delta
+1);
19492 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19494 DIP( "blendpd $%d, %s,%s\n", imm8
,
19495 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19496 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19498 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19499 1/* imm8 is 1 byte after the amode */ );
19500 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19501 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19502 imm8
= (Int
)getUChar(delta
+alen
);
19504 DIP( "blendpd $%d, %s,%s\n",
19505 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19508 putXMMReg( gregOfRexRM(pfx
, modrm
),
19509 mkexpr( math_BLENDPD_128( src_vec
, dst_vec
, imm8
) ) );
19510 goto decode_success
;
19515 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19516 Blend Packed Words (XMM) */
19517 if (have66noF2noF3(pfx
) && sz
== 2) {
19520 IRTemp dst_vec
= newTemp(Ity_V128
);
19521 IRTemp src_vec
= newTemp(Ity_V128
);
19523 modrm
= getUChar(delta
);
19525 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19527 if ( epartIsReg( modrm
) ) {
19528 imm8
= (Int
)getUChar(delta
+1);
19529 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19531 DIP( "pblendw $%d, %s,%s\n", imm8
,
19532 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19533 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19535 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19536 1/* imm8 is 1 byte after the amode */ );
19537 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19538 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19539 imm8
= (Int
)getUChar(delta
+alen
);
19541 DIP( "pblendw $%d, %s,%s\n",
19542 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19545 putXMMReg( gregOfRexRM(pfx
, modrm
),
19546 mkexpr( math_PBLENDW_128( src_vec
, dst_vec
, imm8
) ) );
19547 goto decode_success
;
19552 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19553 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19555 if (have66noF2noF3(pfx
) && sz
== 2) {
19556 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19557 goto decode_success
;
19562 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19563 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19565 if (have66noF2noF3(pfx
) && sz
== 2) {
19566 delta
= dis_PEXTRW( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19567 goto decode_success
;
19572 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19573 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19574 Note that this insn has the same opcodes as PEXTRQ, but
19575 here the REX.W bit is _not_ present */
19576 if (have66noF2noF3(pfx
)
19577 && sz
== 2 /* REX.W is _not_ present */) {
19578 delta
= dis_PEXTRD( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19579 goto decode_success
;
19581 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19582 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19583 Note that this insn has the same opcodes as PEXTRD, but
19584 here the REX.W bit is present */
19585 if (have66noF2noF3(pfx
)
19586 && sz
== 8 /* REX.W is present */) {
19587 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, False
/*!isAvx*/);
19588 goto decode_success
;
19593 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19594 float from xmm reg and store in gen.reg or mem. This is
19595 identical to PEXTRD, except that REX.W appears to be ignored.
19597 if (have66noF2noF3(pfx
)
19598 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
19599 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19600 goto decode_success
;
19605 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19606 Extract byte from r32/m8 and insert into xmm1 */
19607 if (have66noF2noF3(pfx
) && sz
== 2) {
19609 IRTemp new8
= newTemp(Ity_I8
);
19610 modrm
= getUChar(delta
);
19611 UInt rG
= gregOfRexRM(pfx
, modrm
);
19612 if ( epartIsReg( modrm
) ) {
19613 UInt rE
= eregOfRexRM(pfx
,modrm
);
19614 imm8
= (Int
)(getUChar(delta
+1) & 0xF);
19615 assign( new8
, unop(Iop_32to8
, getIReg32(rE
)) );
19617 DIP( "pinsrb $%d,%s,%s\n", imm8
,
19618 nameIReg32(rE
), nameXMMReg(rG
) );
19620 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19621 imm8
= (Int
)(getUChar(delta
+alen
) & 0xF);
19622 assign( new8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
19624 DIP( "pinsrb $%d,%s,%s\n",
19625 imm8
, dis_buf
, nameXMMReg(rG
) );
19627 IRTemp src_vec
= newTemp(Ity_V128
);
19628 assign(src_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
19629 IRTemp res
= math_PINSRB_128( src_vec
, new8
, imm8
);
19630 putXMMReg( rG
, mkexpr(res
) );
19631 goto decode_success
;
19636 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19637 Insert Packed Single Precision Floating-Point Value (XMM) */
19638 if (have66noF2noF3(pfx
) && sz
== 2) {
19640 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
19641 const IRTemp inval
= IRTemp_INVALID
;
19643 modrm
= getUChar(delta
);
19644 UInt rG
= gregOfRexRM(pfx
, modrm
);
19646 if ( epartIsReg( modrm
) ) {
19647 UInt rE
= eregOfRexRM(pfx
, modrm
);
19648 IRTemp vE
= newTemp(Ity_V128
);
19649 assign( vE
, getXMMReg(rE
) );
19650 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
19651 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
19652 imm8
= getUChar(delta
+1);
19653 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
19655 DIP( "insertps $%u, %s,%s\n",
19656 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19658 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19659 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19660 imm8
= getUChar(delta
+alen
);
19662 DIP( "insertps $%u, %s,%s\n",
19663 imm8
, dis_buf
, nameXMMReg(rG
) );
19666 IRTemp vG
= newTemp(Ity_V128
);
19667 assign( vG
, getXMMReg(rG
) );
19669 putXMMReg( rG
, mkexpr(math_INSERTPS( vG
, d2ins
, imm8
)) );
19670 goto decode_success
;
19675 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19676 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19677 if (have66noF2noF3(pfx
)
19678 && sz
== 2 /* REX.W is NOT present */) {
19680 IRTemp src_u32
= newTemp(Ity_I32
);
19681 modrm
= getUChar(delta
);
19682 UInt rG
= gregOfRexRM(pfx
, modrm
);
19684 if ( epartIsReg( modrm
) ) {
19685 UInt rE
= eregOfRexRM(pfx
,modrm
);
19686 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19687 assign( src_u32
, getIReg32( rE
) );
19689 DIP( "pinsrd $%d, %s,%s\n",
19690 imm8_10
, nameIReg32(rE
), nameXMMReg(rG
) );
19692 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19693 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19694 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19696 DIP( "pinsrd $%d, %s,%s\n",
19697 imm8_10
, dis_buf
, nameXMMReg(rG
) );
19700 IRTemp src_vec
= newTemp(Ity_V128
);
19701 assign(src_vec
, getXMMReg( rG
));
19702 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
19703 putXMMReg( rG
, mkexpr(res_vec
) );
19704 goto decode_success
;
19706 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19707 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19708 if (have66noF2noF3(pfx
)
19709 && sz
== 8 /* REX.W is present */) {
19711 IRTemp src_u64
= newTemp(Ity_I64
);
19712 modrm
= getUChar(delta
);
19713 UInt rG
= gregOfRexRM(pfx
, modrm
);
19715 if ( epartIsReg( modrm
) ) {
19716 UInt rE
= eregOfRexRM(pfx
,modrm
);
19717 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
19718 assign( src_u64
, getIReg64( rE
) );
19720 DIP( "pinsrq $%d, %s,%s\n",
19721 imm8_0
, nameIReg64(rE
), nameXMMReg(rG
) );
19723 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19724 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
19725 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
19727 DIP( "pinsrq $%d, %s,%s\n",
19728 imm8_0
, dis_buf
, nameXMMReg(rG
) );
19731 IRTemp src_vec
= newTemp(Ity_V128
);
19732 assign(src_vec
, getXMMReg( rG
));
19733 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
19734 putXMMReg( rG
, mkexpr(res_vec
) );
19735 goto decode_success
;
19740 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19741 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19742 if (have66noF2noF3(pfx
) && sz
== 2) {
19743 modrm
= getUChar(delta
);
19745 IRTemp src_vec
= newTemp(Ity_V128
);
19746 IRTemp dst_vec
= newTemp(Ity_V128
);
19747 UInt rG
= gregOfRexRM(pfx
, modrm
);
19748 assign( dst_vec
, getXMMReg( rG
) );
19749 if ( epartIsReg( modrm
) ) {
19750 UInt rE
= eregOfRexRM(pfx
, modrm
);
19751 imm8
= (Int
)getUChar(delta
+1);
19752 assign( src_vec
, getXMMReg(rE
) );
19754 DIP( "dpps $%d, %s,%s\n",
19755 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19757 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19758 1/* imm8 is 1 byte after the amode */ );
19759 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19760 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19761 imm8
= (Int
)getUChar(delta
+alen
);
19763 DIP( "dpps $%d, %s,%s\n",
19764 imm8
, dis_buf
, nameXMMReg(rG
) );
19766 IRTemp res
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
19767 putXMMReg( rG
, mkexpr(res
) );
19768 goto decode_success
;
19773 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19774 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19775 if (have66noF2noF3(pfx
) && sz
== 2) {
19776 modrm
= getUChar(delta
);
19778 IRTemp src_vec
= newTemp(Ity_V128
);
19779 IRTemp dst_vec
= newTemp(Ity_V128
);
19780 UInt rG
= gregOfRexRM(pfx
, modrm
);
19781 assign( dst_vec
, getXMMReg( rG
) );
19782 if ( epartIsReg( modrm
) ) {
19783 UInt rE
= eregOfRexRM(pfx
, modrm
);
19784 imm8
= (Int
)getUChar(delta
+1);
19785 assign( src_vec
, getXMMReg(rE
) );
19787 DIP( "dppd $%d, %s,%s\n",
19788 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19790 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19791 1/* imm8 is 1 byte after the amode */ );
19792 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19793 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19794 imm8
= (Int
)getUChar(delta
+alen
);
19796 DIP( "dppd $%d, %s,%s\n",
19797 imm8
, dis_buf
, nameXMMReg(rG
) );
19799 IRTemp res
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
19800 putXMMReg( rG
, mkexpr(res
) );
19801 goto decode_success
;
19806 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19807 Multiple Packed Sums of Absolule Difference (XMM) */
19808 if (have66noF2noF3(pfx
) && sz
== 2) {
19810 IRTemp src_vec
= newTemp(Ity_V128
);
19811 IRTemp dst_vec
= newTemp(Ity_V128
);
19812 modrm
= getUChar(delta
);
19813 UInt rG
= gregOfRexRM(pfx
, modrm
);
19815 assign( dst_vec
, getXMMReg(rG
) );
19817 if ( epartIsReg( modrm
) ) {
19818 UInt rE
= eregOfRexRM(pfx
, modrm
);
19820 imm8
= (Int
)getUChar(delta
+1);
19821 assign( src_vec
, getXMMReg(rE
) );
19823 DIP( "mpsadbw $%d, %s,%s\n", imm8
,
19824 nameXMMReg(rE
), nameXMMReg(rG
) );
19826 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19827 1/* imm8 is 1 byte after the amode */ );
19828 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19829 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19830 imm8
= (Int
)getUChar(delta
+alen
);
19832 DIP( "mpsadbw $%d, %s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
) );
19835 putXMMReg( rG
, mkexpr( math_MPSADBW_128(dst_vec
, src_vec
, imm8
) ) );
19836 goto decode_success
;
19841 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19842 * Carry-less multiplication of selected XMM quadwords into XMM
19843 * registers (a.k.a multiplication of polynomials over GF(2))
19845 if (have66noF2noF3(pfx
) && sz
== 2) {
19848 IRTemp svec
= newTemp(Ity_V128
);
19849 IRTemp dvec
= newTemp(Ity_V128
);
19850 modrm
= getUChar(delta
);
19851 UInt rG
= gregOfRexRM(pfx
, modrm
);
19853 assign( dvec
, getXMMReg(rG
) );
19855 if ( epartIsReg( modrm
) ) {
19856 UInt rE
= eregOfRexRM(pfx
, modrm
);
19857 imm8
= (Int
)getUChar(delta
+1);
19858 assign( svec
, getXMMReg(rE
) );
19860 DIP( "pclmulqdq $%d, %s,%s\n", imm8
,
19861 nameXMMReg(rE
), nameXMMReg(rG
) );
19863 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19864 1/* imm8 is 1 byte after the amode */ );
19865 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19866 assign( svec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19867 imm8
= (Int
)getUChar(delta
+alen
);
19869 DIP( "pclmulqdq $%d, %s,%s\n",
19870 imm8
, dis_buf
, nameXMMReg(rG
) );
19873 putXMMReg( rG
, mkexpr( math_PCLMULQDQ(dvec
, svec
, imm8
) ) );
19874 goto decode_success
;
19882 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19883 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19884 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19885 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19886 (selected special cases that actually occur in glibc,
19887 not by any means a complete implementation.)
19889 if (have66noF2noF3(pfx
) && sz
== 2) {
19890 Long delta0
= delta
;
19891 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
19892 if (delta
> delta0
) goto decode_success
;
19893 /* else fall though; dis_PCMPxSTRx failed to decode it */
19898 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19899 if (have66noF2noF3(pfx
) && sz
== 2) {
19900 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19901 goto decode_success
;
19911 *decode_OK
= False
;
19920 /*------------------------------------------------------------*/
19922 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19924 /*------------------------------------------------------------*/
19926 __attribute__((noinline
))
19928 Long
dis_ESC_NONE (
19929 /*MB_OUT*/DisResult
* dres
,
19930 /*MB_OUT*/Bool
* expect_CAS
,
19931 const VexArchInfo
* archinfo
,
19932 const VexAbiInfo
* vbi
,
19933 Prefix pfx
, Int sz
, Long deltaIN
19938 IRTemp addr
= IRTemp_INVALID
;
19939 IRTemp t1
= IRTemp_INVALID
;
19940 IRTemp t2
= IRTemp_INVALID
;
19941 IRTemp t3
= IRTemp_INVALID
;
19942 IRTemp t4
= IRTemp_INVALID
;
19943 IRTemp t5
= IRTemp_INVALID
;
19944 IRType ty
= Ity_INVALID
;
19951 Long delta
= deltaIN
;
19952 UChar opc
= getUChar(delta
); delta
++;
19954 /* delta now points at the modrm byte. In most of the cases that
19955 follow, neither the F2 nor F3 prefixes are allowed. However,
19956 for some basic arithmetic operations we have to allow F2/XACQ or
19957 F3/XREL in the case where the destination is memory and the LOCK
19958 prefix is also present. Do this check by looking at the modrm
19959 byte but not advancing delta over it. */
19960 /* By default, F2 and F3 are not allowed, so let's start off with
19962 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
19963 { UChar tmp_modrm
= getUChar(delta
);
19965 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19966 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19967 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19968 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19969 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19970 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19971 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19972 if (!epartIsReg(tmp_modrm
)
19973 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
19974 /* dst is mem, and we have F2 or F3 but not both */
19975 validF2orF3
= True
;
19983 /* Now, in the switch below, for the opc values examined by the
19984 switch above, use validF2orF3 rather than looking at pfx
19988 case 0x00: /* ADD Gb,Eb */
19989 if (!validF2orF3
) goto decode_failure
;
19990 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
19992 case 0x01: /* ADD Gv,Ev */
19993 if (!validF2orF3
) goto decode_failure
;
19994 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
19997 case 0x02: /* ADD Eb,Gb */
19998 if (haveF2orF3(pfx
)) goto decode_failure
;
19999 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
20001 case 0x03: /* ADD Ev,Gv */
20002 if (haveF2orF3(pfx
)) goto decode_failure
;
20003 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
20006 case 0x04: /* ADD Ib, AL */
20007 if (haveF2orF3(pfx
)) goto decode_failure
;
20008 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
20010 case 0x05: /* ADD Iv, eAX */
20011 if (haveF2orF3(pfx
)) goto decode_failure
;
20012 delta
= dis_op_imm_A(sz
, False
, Iop_Add8
, True
, delta
, "add" );
20015 case 0x08: /* OR Gb,Eb */
20016 if (!validF2orF3
) goto decode_failure
;
20017 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
20019 case 0x09: /* OR Gv,Ev */
20020 if (!validF2orF3
) goto decode_failure
;
20021 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
20024 case 0x0A: /* OR Eb,Gb */
20025 if (haveF2orF3(pfx
)) goto decode_failure
;
20026 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
20028 case 0x0B: /* OR Ev,Gv */
20029 if (haveF2orF3(pfx
)) goto decode_failure
;
20030 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
20033 case 0x0C: /* OR Ib, AL */
20034 if (haveF2orF3(pfx
)) goto decode_failure
;
20035 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
20037 case 0x0D: /* OR Iv, eAX */
20038 if (haveF2orF3(pfx
)) goto decode_failure
;
20039 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
20042 case 0x10: /* ADC Gb,Eb */
20043 if (!validF2orF3
) goto decode_failure
;
20044 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
20046 case 0x11: /* ADC Gv,Ev */
20047 if (!validF2orF3
) goto decode_failure
;
20048 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
20051 case 0x12: /* ADC Eb,Gb */
20052 if (haveF2orF3(pfx
)) goto decode_failure
;
20053 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
20055 case 0x13: /* ADC Ev,Gv */
20056 if (haveF2orF3(pfx
)) goto decode_failure
;
20057 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
20060 case 0x14: /* ADC Ib, AL */
20061 if (haveF2orF3(pfx
)) goto decode_failure
;
20062 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
20064 case 0x15: /* ADC Iv, eAX */
20065 if (haveF2orF3(pfx
)) goto decode_failure
;
20066 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
20069 case 0x18: /* SBB Gb,Eb */
20070 if (!validF2orF3
) goto decode_failure
;
20071 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
20073 case 0x19: /* SBB Gv,Ev */
20074 if (!validF2orF3
) goto decode_failure
;
20075 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20078 case 0x1A: /* SBB Eb,Gb */
20079 if (haveF2orF3(pfx
)) goto decode_failure
;
20080 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
20082 case 0x1B: /* SBB Ev,Gv */
20083 if (haveF2orF3(pfx
)) goto decode_failure
;
20084 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20087 case 0x1C: /* SBB Ib, AL */
20088 if (haveF2orF3(pfx
)) goto decode_failure
;
20089 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
20091 case 0x1D: /* SBB Iv, eAX */
20092 if (haveF2orF3(pfx
)) goto decode_failure
;
20093 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
20096 case 0x20: /* AND Gb,Eb */
20097 if (!validF2orF3
) goto decode_failure
;
20098 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20100 case 0x21: /* AND Gv,Ev */
20101 if (!validF2orF3
) goto decode_failure
;
20102 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20105 case 0x22: /* AND Eb,Gb */
20106 if (haveF2orF3(pfx
)) goto decode_failure
;
20107 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20109 case 0x23: /* AND Ev,Gv */
20110 if (haveF2orF3(pfx
)) goto decode_failure
;
20111 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20114 case 0x24: /* AND Ib, AL */
20115 if (haveF2orF3(pfx
)) goto decode_failure
;
20116 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
20118 case 0x25: /* AND Iv, eAX */
20119 if (haveF2orF3(pfx
)) goto decode_failure
;
20120 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
20123 case 0x28: /* SUB Gb,Eb */
20124 if (!validF2orF3
) goto decode_failure
;
20125 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20127 case 0x29: /* SUB Gv,Ev */
20128 if (!validF2orF3
) goto decode_failure
;
20129 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20132 case 0x2A: /* SUB Eb,Gb */
20133 if (haveF2orF3(pfx
)) goto decode_failure
;
20134 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20136 case 0x2B: /* SUB Ev,Gv */
20137 if (haveF2orF3(pfx
)) goto decode_failure
;
20138 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20141 case 0x2C: /* SUB Ib, AL */
20142 if (haveF2orF3(pfx
)) goto decode_failure
;
20143 delta
= dis_op_imm_A(1, False
, Iop_Sub8
, True
, delta
, "sub" );
20145 case 0x2D: /* SUB Iv, eAX */
20146 if (haveF2orF3(pfx
)) goto decode_failure
;
20147 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
20150 case 0x30: /* XOR Gb,Eb */
20151 if (!validF2orF3
) goto decode_failure
;
20152 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20154 case 0x31: /* XOR Gv,Ev */
20155 if (!validF2orF3
) goto decode_failure
;
20156 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20159 case 0x32: /* XOR Eb,Gb */
20160 if (haveF2orF3(pfx
)) goto decode_failure
;
20161 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20163 case 0x33: /* XOR Ev,Gv */
20164 if (haveF2orF3(pfx
)) goto decode_failure
;
20165 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20168 case 0x34: /* XOR Ib, AL */
20169 if (haveF2orF3(pfx
)) goto decode_failure
;
20170 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
20172 case 0x35: /* XOR Iv, eAX */
20173 if (haveF2orF3(pfx
)) goto decode_failure
;
20174 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
20177 case 0x38: /* CMP Gb,Eb */
20178 if (haveF2orF3(pfx
)) goto decode_failure
;
20179 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20181 case 0x39: /* CMP Gv,Ev */
20182 if (haveF2orF3(pfx
)) goto decode_failure
;
20183 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20186 case 0x3A: /* CMP Eb,Gb */
20187 if (haveF2orF3(pfx
)) goto decode_failure
;
20188 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20190 case 0x3B: /* CMP Ev,Gv */
20191 if (haveF2orF3(pfx
)) goto decode_failure
;
20192 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20195 case 0x3C: /* CMP Ib, AL */
20196 if (haveF2orF3(pfx
)) goto decode_failure
;
20197 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
20199 case 0x3D: /* CMP Iv, eAX */
20200 if (haveF2orF3(pfx
)) goto decode_failure
;
20201 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
20204 case 0x50: /* PUSH eAX */
20205 case 0x51: /* PUSH eCX */
20206 case 0x52: /* PUSH eDX */
20207 case 0x53: /* PUSH eBX */
20208 case 0x55: /* PUSH eBP */
20209 case 0x56: /* PUSH eSI */
20210 case 0x57: /* PUSH eDI */
20211 case 0x54: /* PUSH eSP */
20212 /* This is the Right Way, in that the value to be pushed is
20213 established before %rsp is changed, so that pushq %rsp
20214 correctly pushes the old value. */
20215 if (haveF2orF3(pfx
)) goto decode_failure
;
20216 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20218 sz
= 8; /* there is no encoding for 32-bit push in 64-bit mode */
20219 ty
= sz
==2 ? Ity_I16
: Ity_I64
;
20221 t2
= newTemp(Ity_I64
);
20222 assign(t1
, getIRegRexB(sz
, pfx
, opc
-0x50));
20223 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(sz
)));
20224 putIReg64(R_RSP
, mkexpr(t2
) );
20225 storeLE(mkexpr(t2
),mkexpr(t1
));
20226 DIP("push%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x50));
20229 case 0x58: /* POP eAX */
20230 case 0x59: /* POP eCX */
20231 case 0x5A: /* POP eDX */
20232 case 0x5B: /* POP eBX */
20233 case 0x5D: /* POP eBP */
20234 case 0x5E: /* POP eSI */
20235 case 0x5F: /* POP eDI */
20236 case 0x5C: /* POP eSP */
20237 if (haveF2orF3(pfx
)) goto decode_failure
;
20238 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20240 sz
= 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20241 t1
= newTemp(szToITy(sz
));
20242 t2
= newTemp(Ity_I64
);
20243 assign(t2
, getIReg64(R_RSP
));
20244 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
20245 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20246 putIRegRexB(sz
, pfx
, opc
-0x58, mkexpr(t1
));
20247 DIP("pop%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x58));
20250 case 0x63: /* MOVSX */
20251 if (haveF2orF3(pfx
)) goto decode_failure
;
20252 if (haveREX(pfx
) && 1==getRexW(pfx
)) {
20254 /* movsx r/m32 to r64 */
20255 modrm
= getUChar(delta
);
20256 if (epartIsReg(modrm
)) {
20258 putIRegG(8, pfx
, modrm
,
20260 getIRegE(4, pfx
, modrm
)));
20261 DIP("movslq %s,%s\n",
20262 nameIRegE(4, pfx
, modrm
),
20263 nameIRegG(8, pfx
, modrm
));
20266 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20268 putIRegG(8, pfx
, modrm
,
20270 loadLE(Ity_I32
, mkexpr(addr
))));
20271 DIP("movslq %s,%s\n", dis_buf
,
20272 nameIRegG(8, pfx
, modrm
));
20276 goto decode_failure
;
20279 case 0x68: /* PUSH Iv */
20280 if (haveF2orF3(pfx
)) goto decode_failure
;
20281 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20282 if (sz
== 4) sz
= 8;
20283 d64
= getSDisp(imin(4,sz
),delta
);
20284 delta
+= imin(4,sz
);
20287 case 0x69: /* IMUL Iv, Ev, Gv */
20288 if (haveF2orF3(pfx
)) goto decode_failure
;
20289 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, sz
);
20292 case 0x6A: /* PUSH Ib, sign-extended to sz */
20293 if (haveF2orF3(pfx
)) goto decode_failure
;
20294 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20295 if (sz
== 4) sz
= 8;
20296 d64
= getSDisp8(delta
); delta
+= 1;
20300 t1
= newTemp(Ity_I64
);
20302 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20303 putIReg64(R_RSP
, mkexpr(t1
) );
20304 /* stop mkU16 asserting if d32 is a negative 16-bit number
20308 storeLE( mkexpr(t1
), mkU(ty
,d64
) );
20309 DIP("push%c $%lld\n", nameISize(sz
), (Long
)d64
);
20312 case 0x6B: /* IMUL Ib, Ev, Gv */
20313 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, 1 );
20318 case 0x72: /* JBb/JNAEb (jump below) */
20319 case 0x73: /* JNBb/JAEb (jump not below) */
20320 case 0x74: /* JZb/JEb (jump zero) */
20321 case 0x75: /* JNZb/JNEb (jump not zero) */
20322 case 0x76: /* JBEb/JNAb (jump below or equal) */
20323 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20324 case 0x78: /* JSb (jump negative) */
20325 case 0x79: /* JSb (jump not negative) */
20326 case 0x7A: /* JP (jump parity even) */
20327 case 0x7B: /* JNP/JPO (jump parity odd) */
20328 case 0x7C: /* JLb/JNGEb (jump less) */
20329 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20330 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20331 case 0x7F: { /* JGb/JNLEb (jump greater) */
20333 const HChar
* comment
= "";
20334 if (haveF3(pfx
)) goto decode_failure
;
20335 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20336 jmpDelta
= getSDisp8(delta
);
20337 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
20338 d64
= (guest_RIP_bbstart
+delta
+1) + jmpDelta
;
20340 /* End the block at this point. */
20341 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x70),
20342 guest_RIP_bbstart
+delta
, d64
);
20343 vassert(dres
->whatNext
== Dis_StopHere
);
20344 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc
- 0x70), (ULong
)d64
,
20349 case 0x80: /* Grp1 Ib,Eb */
20350 modrm
= getUChar(delta
);
20351 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20352 just one for the mem case and also require LOCK in this case.
20353 Note that this erroneously allows XACQ/XREL on CMP since we
20354 don't check the subopcode here. No big deal. */
20355 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20356 goto decode_failure
;
20357 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20358 goto decode_failure
;
20359 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20360 goto decode_failure
;
20361 am_sz
= lengthAMode(pfx
,delta
);
20364 d64
= getSDisp8(delta
+ am_sz
);
20365 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20368 case 0x81: /* Grp1 Iv,Ev */
20369 modrm
= getUChar(delta
);
20370 /* Same comment as for case 0x80 just above. */
20371 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20372 goto decode_failure
;
20373 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20374 goto decode_failure
;
20375 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20376 goto decode_failure
;
20377 am_sz
= lengthAMode(pfx
,delta
);
20379 d64
= getSDisp(d_sz
, delta
+ am_sz
);
20380 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20383 case 0x83: /* Grp1 Ib,Ev */
20384 if (haveF2orF3(pfx
)) goto decode_failure
;
20385 modrm
= getUChar(delta
);
20386 am_sz
= lengthAMode(pfx
,delta
);
20388 d64
= getSDisp8(delta
+ am_sz
);
20389 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20392 case 0x84: /* TEST Eb,Gb */
20393 if (haveF2orF3(pfx
)) goto decode_failure
;
20394 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20395 1, delta
, "test" );
20398 case 0x85: /* TEST Ev,Gv */
20399 if (haveF2orF3(pfx
)) goto decode_failure
;
20400 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20401 sz
, delta
, "test" );
20404 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20405 prefix. Therefore, generate CAS regardless of the presence or
20406 otherwise of a LOCK prefix. */
20407 case 0x86: /* XCHG Gb,Eb */
20409 /* Fall through ... */
20410 case 0x87: /* XCHG Gv,Ev */
20411 modrm
= getUChar(delta
);
20412 /* Check whether F2 or F3 are allowable. For the mem case, one
20413 or the othter but not both are. We don't care about the
20414 presence of LOCK in this case -- XCHG is unusual in this
20416 if (haveF2orF3(pfx
)) {
20417 if (epartIsReg(modrm
)) {
20418 goto decode_failure
;
20420 if (haveF2andF3(pfx
))
20421 goto decode_failure
;
20425 t1
= newTemp(ty
); t2
= newTemp(ty
);
20426 if (epartIsReg(modrm
)) {
20427 assign(t1
, getIRegE(sz
, pfx
, modrm
));
20428 assign(t2
, getIRegG(sz
, pfx
, modrm
));
20429 putIRegG(sz
, pfx
, modrm
, mkexpr(t1
));
20430 putIRegE(sz
, pfx
, modrm
, mkexpr(t2
));
20432 DIP("xchg%c %s, %s\n",
20433 nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
20434 nameIRegE(sz
, pfx
, modrm
));
20436 *expect_CAS
= True
;
20437 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20438 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
20439 assign( t2
, getIRegG(sz
, pfx
, modrm
) );
20440 casLE( mkexpr(addr
),
20441 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
20442 putIRegG( sz
, pfx
, modrm
, mkexpr(t1
) );
20444 DIP("xchg%c %s, %s\n", nameISize(sz
),
20445 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
20449 case 0x88: { /* MOV Gb,Eb */
20450 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20452 delta
= dis_mov_G_E(vbi
, pfx
, 1, delta
, &ok
);
20453 if (!ok
) goto decode_failure
;
20457 case 0x89: { /* MOV Gv,Ev */
20458 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20460 delta
= dis_mov_G_E(vbi
, pfx
, sz
, delta
, &ok
);
20461 if (!ok
) goto decode_failure
;
20465 case 0x8A: /* MOV Eb,Gb */
20466 if (haveF2orF3(pfx
)) goto decode_failure
;
20467 delta
= dis_mov_E_G(vbi
, pfx
, 1, delta
);
20470 case 0x8B: /* MOV Ev,Gv */
20471 if (haveF2orF3(pfx
)) goto decode_failure
;
20472 delta
= dis_mov_E_G(vbi
, pfx
, sz
, delta
);
20475 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20476 if (haveF2orF3(pfx
)) goto decode_failure
;
20477 delta
= dis_mov_S_E(vbi
, pfx
, sz
, delta
);
20480 case 0x8D: /* LEA M,Gv */
20481 if (haveF2orF3(pfx
)) goto decode_failure
;
20482 if (sz
!= 4 && sz
!= 8)
20483 goto decode_failure
;
20484 modrm
= getUChar(delta
);
20485 if (epartIsReg(modrm
))
20486 goto decode_failure
;
20487 /* NOTE! this is the one place where a segment override prefix
20488 has no effect on the address calculation. Therefore we clear
20489 any segment override bits in pfx. */
20490 addr
= disAMode ( &alen
, vbi
, clearSegBits(pfx
), delta
, dis_buf
, 0 );
20492 /* This is a hack. But it isn't clear that really doing the
20493 calculation at 32 bits is really worth it. Hence for leal,
20494 do the full 64-bit calculation and then truncate it. */
20495 putIRegG( sz
, pfx
, modrm
,
20497 ? unop(Iop_64to32
, mkexpr(addr
))
20500 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
20501 nameIRegG(sz
,pfx
,modrm
));
20504 case 0x8F: { /* POPQ m64 / POPW m16 */
20507 /* There is no encoding for 32-bit pop in 64-bit mode.
20508 So sz==4 actually means sz==8. */
20509 if (haveF2orF3(pfx
)) goto decode_failure
;
20510 vassert(sz
== 2 || sz
== 4
20511 || /* tolerate redundant REX.W, see #210481 */ sz
== 8);
20512 if (sz
== 4) sz
= 8;
20513 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20515 rm
= getUChar(delta
);
20517 /* make sure this instruction is correct POP */
20518 if (epartIsReg(rm
) || gregLO3ofRM(rm
) != 0)
20519 goto decode_failure
;
20520 /* and has correct size */
20523 t1
= newTemp(Ity_I64
);
20524 t3
= newTemp(Ity_I64
);
20525 assign( t1
, getIReg64(R_RSP
) );
20526 assign( t3
, loadLE(Ity_I64
, mkexpr(t1
)) );
20528 /* Increase RSP; must be done before the STORE. Intel manual
20529 says: If the RSP register is used as a base register for
20530 addressing a destination operand in memory, the POP
20531 instruction computes the effective address of the operand
20532 after it increments the RSP register. */
20533 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(sz
)) );
20535 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
20536 storeLE( mkexpr(addr
), mkexpr(t3
) );
20538 DIP("popl %s\n", dis_buf
);
20544 case 0x90: /* XCHG eAX,eAX */
20545 /* detect and handle F3 90 (rep nop) specially */
20546 if (!have66(pfx
) && !haveF2(pfx
) && haveF3(pfx
)) {
20547 DIP("rep nop (P4 pause)\n");
20548 /* "observe" the hint. The Vex client needs to be careful not
20549 to cause very long delays as a result, though. */
20550 jmp_lit(dres
, Ijk_Yield
, guest_RIP_bbstart
+delta
);
20551 vassert(dres
->whatNext
== Dis_StopHere
);
20554 /* detect and handle NOPs specially */
20555 if (/* F2/F3 probably change meaning completely */
20557 /* If REX.B is 1, we're not exchanging rAX with itself */
20558 && getRexB(pfx
)==0 ) {
20562 /* else fall through to normal case. */
20563 case 0x91: /* XCHG rAX,rCX */
20564 case 0x92: /* XCHG rAX,rDX */
20565 case 0x93: /* XCHG rAX,rBX */
20566 case 0x94: /* XCHG rAX,rSP */
20567 case 0x95: /* XCHG rAX,rBP */
20568 case 0x96: /* XCHG rAX,rSI */
20569 case 0x97: /* XCHG rAX,rDI */
20570 /* guard against mutancy */
20571 if (haveF2orF3(pfx
)) goto decode_failure
;
20572 codegen_xchg_rAX_Reg ( pfx
, sz
, opc
- 0x90 );
20575 case 0x98: /* CBW */
20576 if (haveF2orF3(pfx
)) goto decode_failure
;
20578 putIRegRAX( 8, unop(Iop_32Sto64
, getIRegRAX(4)) );
20579 DIP(/*"cdqe\n"*/"cltq\n");
20583 putIRegRAX( 4, unop(Iop_16Sto32
, getIRegRAX(2)) );
20588 putIRegRAX( 2, unop(Iop_8Sto16
, getIRegRAX(1)) );
20592 goto decode_failure
;
20594 case 0x99: /* CWD/CDQ/CQO */
20595 if (haveF2orF3(pfx
)) goto decode_failure
;
20596 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20599 binop(mkSizedOp(ty
,Iop_Sar8
),
20601 mkU8(sz
== 2 ? 15 : (sz
== 4 ? 31 : 63))) );
20602 DIP(sz
== 2 ? "cwd\n"
20603 : (sz
== 4 ? /*"cdq\n"*/ "cltd\n"
20607 case 0x9B: /* FWAIT (X87 insn) */
20612 case 0x9C: /* PUSHF */ {
20613 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20614 mode. So sz==4 actually means sz==8. */
20615 /* 24 July 06: has also been seen with a redundant REX prefix,
20616 so must also allow sz==8. */
20617 if (haveF2orF3(pfx
)) goto decode_failure
;
20618 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20619 if (sz
== 4) sz
= 8;
20620 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20622 t1
= newTemp(Ity_I64
);
20623 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20624 putIReg64(R_RSP
, mkexpr(t1
) );
20626 t2
= newTemp(Ity_I64
);
20627 assign( t2
, mk_amd64g_calculate_rflags_all() );
20629 /* Patch in the D flag. This can simply be a copy of bit 10 of
20630 baseBlock[OFFB_DFLAG]. */
20631 t3
= newTemp(Ity_I64
);
20632 assign( t3
, binop(Iop_Or64
,
20635 IRExpr_Get(OFFB_DFLAG
,Ity_I64
),
20639 /* And patch in the ID flag. */
20640 t4
= newTemp(Ity_I64
);
20641 assign( t4
, binop(Iop_Or64
,
20644 binop(Iop_Shl64
, IRExpr_Get(OFFB_IDFLAG
,Ity_I64
),
20649 /* And patch in the AC flag too. */
20650 t5
= newTemp(Ity_I64
);
20651 assign( t5
, binop(Iop_Or64
,
20654 binop(Iop_Shl64
, IRExpr_Get(OFFB_ACFLAG
,Ity_I64
),
20659 /* if sz==2, the stored value needs to be narrowed. */
20661 storeLE( mkexpr(t1
), unop(Iop_32to16
,
20662 unop(Iop_64to32
,mkexpr(t5
))) );
20664 storeLE( mkexpr(t1
), mkexpr(t5
) );
20666 DIP("pushf%c\n", nameISize(sz
));
20670 case 0x9D: /* POPF */
20671 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20672 So sz==4 actually means sz==8. */
20673 if (haveF2orF3(pfx
)) goto decode_failure
;
20674 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20675 if (sz
== 4) sz
= 8;
20676 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20677 t1
= newTemp(Ity_I64
); t2
= newTemp(Ity_I64
);
20678 assign(t2
, getIReg64(R_RSP
));
20679 assign(t1
, widenUto64(loadLE(szToITy(sz
),mkexpr(t2
))));
20680 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20681 /* t1 is the flag word. Mask out everything except OSZACP and
20682 set the flags thunk to AMD64G_CC_OP_COPY. */
20683 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
20684 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
20685 stmt( IRStmt_Put( OFFB_CC_DEP1
,
20688 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
20689 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
20690 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
20694 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
20696 /* Also need to set the D flag, which is held in bit 10 of t1.
20697 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20703 binop(Iop_Shr64
, mkexpr(t1
), mkU8(10)),
20705 mkU64(0xFFFFFFFFFFFFFFFFULL
),
20709 /* And set the ID flag */
20715 binop(Iop_Shr64
, mkexpr(t1
), mkU8(21)),
20721 /* And set the AC flag too */
20727 binop(Iop_Shr64
, mkexpr(t1
), mkU8(18)),
20733 DIP("popf%c\n", nameISize(sz
));
20736 case 0x9E: /* SAHF */
20741 case 0x9F: /* LAHF */
20746 case 0xA0: /* MOV Ob,AL */
20747 if (have66orF2orF3(pfx
)) goto decode_failure
;
20749 /* Fall through ... */
20750 case 0xA1: /* MOV Ov,eAX */
20751 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20752 goto decode_failure
;
20753 d64
= getDisp64(delta
);
20756 addr
= newTemp(Ity_I64
);
20757 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20758 putIRegRAX(sz
, loadLE( ty
, mkexpr(addr
) ));
20759 DIP("mov%c %s0x%llx, %s\n", nameISize(sz
),
20760 segRegTxt(pfx
), (ULong
)d64
,
20764 case 0xA2: /* MOV AL,Ob */
20765 if (have66orF2orF3(pfx
)) goto decode_failure
;
20767 /* Fall through ... */
20768 case 0xA3: /* MOV eAX,Ov */
20769 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20770 goto decode_failure
;
20771 d64
= getDisp64(delta
);
20774 addr
= newTemp(Ity_I64
);
20775 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20776 storeLE( mkexpr(addr
), getIRegRAX(sz
) );
20777 DIP("mov%c %s, %s0x%llx\n", nameISize(sz
), nameIRegRAX(sz
),
20778 segRegTxt(pfx
), (ULong
)d64
);
20783 /* F3 A4: rep movsb */
20784 if (haveF3(pfx
) && !haveF2(pfx
)) {
20787 dis_REP_op ( dres
, AMD64CondAlways
, dis_MOVS
, sz
,
20788 guest_RIP_curr_instr
,
20789 guest_RIP_bbstart
+delta
, "rep movs", pfx
);
20790 dres
->whatNext
= Dis_StopHere
;
20794 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20797 dis_string_op( dis_MOVS
, sz
, "movs", pfx
);
20800 goto decode_failure
;
20804 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20805 if (haveF3(pfx
) && !haveF2(pfx
)) {
20808 dis_REP_op ( dres
, AMD64CondZ
, dis_CMPS
, sz
,
20809 guest_RIP_curr_instr
,
20810 guest_RIP_bbstart
+delta
, "repe cmps", pfx
);
20811 dres
->whatNext
= Dis_StopHere
;
20814 goto decode_failure
;
20818 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20819 if (haveF3(pfx
) && !haveF2(pfx
)) {
20822 dis_REP_op ( dres
, AMD64CondAlways
, dis_STOS
, sz
,
20823 guest_RIP_curr_instr
,
20824 guest_RIP_bbstart
+delta
, "rep stos", pfx
);
20825 vassert(dres
->whatNext
== Dis_StopHere
);
20828 /* AA/AB: stosb/stos{w,l,q} */
20829 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20832 dis_string_op( dis_STOS
, sz
, "stos", pfx
);
20835 goto decode_failure
;
20837 case 0xA8: /* TEST Ib, AL */
20838 if (haveF2orF3(pfx
)) goto decode_failure
;
20839 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
20841 case 0xA9: /* TEST Iv, eAX */
20842 if (haveF2orF3(pfx
)) goto decode_failure
;
20843 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
20846 case 0xAC: /* LODS, no REP prefix */
20848 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", pfx
);
20853 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20854 if (haveF2(pfx
) && !haveF3(pfx
)) {
20857 dis_REP_op ( dres
, AMD64CondNZ
, dis_SCAS
, sz
,
20858 guest_RIP_curr_instr
,
20859 guest_RIP_bbstart
+delta
, "repne scas", pfx
);
20860 vassert(dres
->whatNext
== Dis_StopHere
);
20863 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20864 if (!haveF2(pfx
) && haveF3(pfx
)) {
20867 dis_REP_op ( dres
, AMD64CondZ
, dis_SCAS
, sz
,
20868 guest_RIP_curr_instr
,
20869 guest_RIP_bbstart
+delta
, "repe scas", pfx
);
20870 vassert(dres
->whatNext
== Dis_StopHere
);
20873 /* AE/AF: scasb/scas{w,l,q} */
20874 if (!haveF2(pfx
) && !haveF3(pfx
)) {
20877 dis_string_op( dis_SCAS
, sz
, "scas", pfx
);
20880 goto decode_failure
;
20882 /* XXXX be careful here with moves to AH/BH/CH/DH */
20883 case 0xB0: /* MOV imm,AL */
20884 case 0xB1: /* MOV imm,CL */
20885 case 0xB2: /* MOV imm,DL */
20886 case 0xB3: /* MOV imm,BL */
20887 case 0xB4: /* MOV imm,AH */
20888 case 0xB5: /* MOV imm,CH */
20889 case 0xB6: /* MOV imm,DH */
20890 case 0xB7: /* MOV imm,BH */
20891 if (haveF2orF3(pfx
)) goto decode_failure
;
20892 d64
= getUChar(delta
);
20894 putIRegRexB(1, pfx
, opc
-0xB0, mkU8(d64
));
20895 DIP("movb $%lld,%s\n", d64
, nameIRegRexB(1,pfx
,opc
-0xB0));
20898 case 0xB8: /* MOV imm,eAX */
20899 case 0xB9: /* MOV imm,eCX */
20900 case 0xBA: /* MOV imm,eDX */
20901 case 0xBB: /* MOV imm,eBX */
20902 case 0xBC: /* MOV imm,eSP */
20903 case 0xBD: /* MOV imm,eBP */
20904 case 0xBE: /* MOV imm,eSI */
20905 case 0xBF: /* MOV imm,eDI */
20906 /* This is the one-and-only place where 64-bit literals are
20907 allowed in the instruction stream. */
20908 if (haveF2orF3(pfx
)) goto decode_failure
;
20910 d64
= getDisp64(delta
);
20912 putIRegRexB(8, pfx
, opc
-0xB8, mkU64(d64
));
20913 DIP("movabsq $%lld,%s\n", (Long
)d64
,
20914 nameIRegRexB(8,pfx
,opc
-0xB8));
20916 d64
= getSDisp(imin(4,sz
),delta
);
20917 delta
+= imin(4,sz
);
20918 putIRegRexB(sz
, pfx
, opc
-0xB8,
20919 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20920 DIP("mov%c $%lld,%s\n", nameISize(sz
),
20922 nameIRegRexB(sz
,pfx
,opc
-0xB8));
20926 case 0xC0: { /* Grp2 Ib,Eb */
20927 Bool decode_OK
= True
;
20928 if (haveF2orF3(pfx
)) goto decode_failure
;
20929 modrm
= getUChar(delta
);
20930 am_sz
= lengthAMode(pfx
,delta
);
20932 d64
= getUChar(delta
+ am_sz
);
20934 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20935 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20936 if (!decode_OK
) goto decode_failure
;
20940 case 0xC1: { /* Grp2 Ib,Ev */
20941 Bool decode_OK
= True
;
20942 if (haveF2orF3(pfx
)) goto decode_failure
;
20943 modrm
= getUChar(delta
);
20944 am_sz
= lengthAMode(pfx
,delta
);
20946 d64
= getUChar(delta
+ am_sz
);
20947 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20948 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20949 if (!decode_OK
) goto decode_failure
;
20953 case 0xC2: /* RET imm16 */
20954 if (have66orF3(pfx
)) goto decode_failure
;
20955 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20956 d64
= getUDisp16(delta
);
20958 dis_ret(dres
, vbi
, d64
);
20959 DIP("ret $%lld\n", d64
);
20962 case 0xC3: /* RET */
20963 if (have66(pfx
)) goto decode_failure
;
20964 /* F3 is acceptable on AMD. */
20965 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20966 dis_ret(dres
, vbi
, 0);
20967 DIP(haveF3(pfx
) ? "rep ; ret\n" : "ret\n");
20970 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20972 goto maybe_do_Mov_I_E
;
20973 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20974 goto maybe_do_Mov_I_E
;
20976 modrm
= getUChar(delta
);
20977 if (gregLO3ofRM(modrm
) == 0) {
20978 if (epartIsReg(modrm
)) {
20979 /* Neither F2 nor F3 are allowable. */
20980 if (haveF2orF3(pfx
)) goto decode_failure
;
20981 delta
++; /* mod/rm byte */
20982 d64
= getSDisp(imin(4,sz
),delta
);
20983 delta
+= imin(4,sz
);
20984 putIRegE(sz
, pfx
, modrm
,
20985 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20986 DIP("mov%c $%lld, %s\n", nameISize(sz
),
20988 nameIRegE(sz
,pfx
,modrm
));
20990 if (haveF2(pfx
)) goto decode_failure
;
20991 /* F3(XRELEASE) is allowable here */
20992 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
20993 /*xtra*/imin(4,sz
) );
20995 d64
= getSDisp(imin(4,sz
),delta
);
20996 delta
+= imin(4,sz
);
20997 storeLE(mkexpr(addr
),
20998 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20999 DIP("mov%c $%lld, %s\n", nameISize(sz
), (Long
)d64
, dis_buf
);
21003 /* BEGIN HACKY SUPPORT FOR xbegin */
21004 if (opc
== 0xC7 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 4
21005 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21006 delta
++; /* mod/rm byte */
21007 d64
= getSDisp(4,delta
);
21009 guest_RIP_next_mustcheck
= True
;
21010 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21011 Addr64 failAddr
= guest_RIP_bbstart
+ delta
+ d64
;
21012 /* EAX contains the failure status code. Bit 3 is "Set if an
21013 internal buffer overflowed", which seems like the
21014 least-bogus choice we can make here. */
21015 putIRegRAX(4, mkU32(1<<3));
21016 /* And jump to the fail address. */
21017 jmp_lit(dres
, Ijk_Boring
, failAddr
);
21018 vassert(dres
->whatNext
== Dis_StopHere
);
21019 DIP("xbeginq 0x%llx\n", failAddr
);
21022 /* END HACKY SUPPORT FOR xbegin */
21023 /* BEGIN HACKY SUPPORT FOR xabort */
21024 if (opc
== 0xC6 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 1
21025 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21026 delta
++; /* mod/rm byte */
21027 abyte
= getUChar(delta
); delta
++;
21028 /* There is never a real transaction in progress, so do nothing. */
21029 DIP("xabort $%d", (Int
)abyte
);
21032 /* END HACKY SUPPORT FOR xabort */
21033 goto decode_failure
;
21035 case 0xC8: /* ENTER */
21036 /* Same comments re operand size as for LEAVE below apply.
21037 Also, only handles the case "enter $imm16, $0"; other cases
21038 for the second operand (nesting depth) are not handled. */
21040 goto decode_failure
;
21041 d64
= getUDisp16(delta
);
21043 vassert(d64
>= 0 && d64
<= 0xFFFF);
21044 if (getUChar(delta
) != 0)
21045 goto decode_failure
;
21047 /* Intel docs seem to suggest:
21053 t1
= newTemp(Ity_I64
);
21054 assign(t1
, getIReg64(R_RBP
));
21055 t2
= newTemp(Ity_I64
);
21056 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21057 putIReg64(R_RSP
, mkexpr(t2
));
21058 storeLE(mkexpr(t2
), mkexpr(t1
));
21059 putIReg64(R_RBP
, mkexpr(t2
));
21061 putIReg64(R_RSP
, binop(Iop_Sub64
, mkexpr(t2
), mkU64(d64
)));
21063 DIP("enter $%u, $0\n", (UInt
)d64
);
21066 case 0xC9: /* LEAVE */
21067 /* In 64-bit mode this defaults to a 64-bit operand size. There
21068 is no way to encode a 32-bit variant. Hence sz==4 but we do
21071 goto decode_failure
;
21072 t1
= newTemp(Ity_I64
);
21073 t2
= newTemp(Ity_I64
);
21074 assign(t1
, getIReg64(R_RBP
));
21075 /* First PUT RSP looks redundant, but need it because RSP must
21076 always be up-to-date for Memcheck to work... */
21077 putIReg64(R_RSP
, mkexpr(t1
));
21078 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
21079 putIReg64(R_RBP
, mkexpr(t2
));
21080 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8)) );
21084 case 0xCC: /* INT 3 */
21085 jmp_lit(dres
, Ijk_SigTRAP
, guest_RIP_bbstart
+ delta
);
21086 vassert(dres
->whatNext
== Dis_StopHere
);
21090 case 0xCD: /* INT imm8 */
21091 d64
= getUChar(delta
); delta
++;
21093 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21095 jmp_lit(dres
, Ijk_Sys_int210
, guest_RIP_bbstart
+ delta
);
21096 vassert(dres
->whatNext
== Dis_StopHere
);
21097 DIP("int $0xD2\n");
21100 goto decode_failure
;
21102 case 0xCF: /* IRET */
21103 /* Note, this is an extremely kludgey and limited implementation of iret
21104 based on the extremely kludgey and limited implementation of iret for x86
21105 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21106 %CS and %SS are ignored */
21107 if (sz
!= 8 || have66orF2orF3(pfx
)) goto decode_failure
;
21109 t1
= newTemp(Ity_I64
); /* RSP */
21110 t2
= newTemp(Ity_I64
); /* new RIP */
21111 /* t3 = newTemp(Ity_I32); new CS */
21112 t4
= newTemp(Ity_I64
); /* new RFLAGS */
21113 t5
= newTemp(Ity_I64
); /* new RSP */
21114 /* t6 = newTemp(Ity_I32); new SS */
21116 assign(t1
, getIReg64(R_RSP
));
21117 assign(t2
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(0))));
21118 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21119 assign(t4
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(16))));
21120 assign(t5
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(24))));
21121 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21124 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21125 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21126 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21127 stmt( IRStmt_Put( OFFB_CC_DEP1
,
21130 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
21131 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
21132 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
21137 /* Also need to set the D flag, which is held in bit 10 of t4.
21138 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21144 binop(Iop_Shr64
, mkexpr(t4
), mkU8(10)),
21146 mkU64(0xFFFFFFFFFFFFFFFFULL
),
21150 /* And set the ID flag */
21156 binop(Iop_Shr64
, mkexpr(t4
), mkU8(21)),
21162 /* And set the AC flag too */
21168 binop(Iop_Shr64
, mkexpr(t4
), mkU8(18)),
21175 /* set new stack */
21176 putIReg64(R_RSP
, mkexpr(t5
));
21178 /* goto new RIP value */
21179 jmp_treg(dres
, Ijk_Ret
, t2
);
21180 DIP("iret (very kludgey)\n");
21183 case 0xD0: { /* Grp2 1,Eb */
21184 Bool decode_OK
= True
;
21185 if (haveF2orF3(pfx
)) goto decode_failure
;
21186 modrm
= getUChar(delta
);
21187 am_sz
= lengthAMode(pfx
,delta
);
21191 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21192 mkU8(d64
), NULL
, &decode_OK
);
21193 if (!decode_OK
) goto decode_failure
;
21197 case 0xD1: { /* Grp2 1,Ev */
21198 Bool decode_OK
= True
;
21199 if (haveF2orF3(pfx
)) goto decode_failure
;
21200 modrm
= getUChar(delta
);
21201 am_sz
= lengthAMode(pfx
,delta
);
21204 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21205 mkU8(d64
), NULL
, &decode_OK
);
21206 if (!decode_OK
) goto decode_failure
;
21210 case 0xD2: { /* Grp2 CL,Eb */
21211 Bool decode_OK
= True
;
21212 if (haveF2orF3(pfx
)) goto decode_failure
;
21213 modrm
= getUChar(delta
);
21214 am_sz
= lengthAMode(pfx
,delta
);
21217 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21218 getIRegCL(), "%cl", &decode_OK
);
21219 if (!decode_OK
) goto decode_failure
;
21223 case 0xD3: { /* Grp2 CL,Ev */
21224 Bool decode_OK
= True
;
21225 if (haveF2orF3(pfx
)) goto decode_failure
;
21226 modrm
= getUChar(delta
);
21227 am_sz
= lengthAMode(pfx
,delta
);
21229 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21230 getIRegCL(), "%cl", &decode_OK
);
21231 if (!decode_OK
) goto decode_failure
;
21235 case 0xD8: /* X87 instructions */
21243 Bool redundantREXWok
= False
;
21245 if (haveF2orF3(pfx
))
21246 goto decode_failure
;
21248 /* kludge to tolerate redundant rex.w prefixes (should do this
21249 properly one day) */
21250 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21251 if ( (opc
== 0xD9 && getUChar(delta
+0) == 0xFA)/*fsqrt*/ )
21252 redundantREXWok
= True
;
21254 Bool size_OK
= False
;
21257 else if ( sz
== 8 )
21258 size_OK
= redundantREXWok
;
21259 else if ( sz
== 2 ) {
21260 int mod_rm
= getUChar(delta
+0);
21261 int reg
= gregLO3ofRM(mod_rm
);
21262 /* The HotSpot JVM uses these */
21263 if ( (opc
== 0xDD) && (reg
== 0 /* FLDL */ ||
21264 reg
== 4 /* FNSAVE */ ||
21265 reg
== 6 /* FRSTOR */ ) )
21268 /* AMD manual says 0x66 size override is ignored, except where
21269 it is meaningful */
21271 goto decode_failure
;
21273 Bool decode_OK
= False
;
21274 delta
= dis_FPU ( &decode_OK
, vbi
, pfx
, delta
);
21276 goto decode_failure
;
21281 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21282 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21283 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21284 { /* The docs say this uses rCX as a count depending on the
21285 address size override, not the operand one. */
21286 IRExpr
* zbit
= NULL
;
21287 IRExpr
* count
= NULL
;
21288 IRExpr
* cond
= NULL
;
21289 const HChar
* xtra
= NULL
;
21291 if (have66orF2orF3(pfx
) || 1==getRexW(pfx
)) goto decode_failure
;
21292 /* So at this point we've rejected any variants which appear to
21293 be governed by the usual operand-size modifiers. Hence only
21294 the address size prefix can have an effect. It changes the
21295 size from 64 (default) to 32. */
21296 d64
= guest_RIP_bbstart
+delta
+1 + getSDisp8(delta
);
21298 if (haveASO(pfx
)) {
21299 /* 64to32 of 64-bit get is merely a get-put improvement
21301 putIReg32(R_RCX
, binop(Iop_Sub32
,
21302 unop(Iop_64to32
, getIReg64(R_RCX
)),
21305 putIReg64(R_RCX
, binop(Iop_Sub64
, getIReg64(R_RCX
), mkU64(1)));
21308 /* This is correct, both for 32- and 64-bit versions. If we're
21309 doing a 32-bit dec and the result is zero then the default
21310 zero extension rule will cause the upper 32 bits to be zero
21311 too. Hence a 64-bit check against zero is OK. */
21312 count
= getIReg64(R_RCX
);
21313 cond
= binop(Iop_CmpNE64
, count
, mkU64(0));
21320 zbit
= mk_amd64g_calculate_condition( AMD64CondZ
);
21321 cond
= mkAnd1(cond
, zbit
);
21325 zbit
= mk_amd64g_calculate_condition( AMD64CondNZ
);
21326 cond
= mkAnd1(cond
, zbit
);
21331 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U64(d64
), OFFB_RIP
) );
21333 DIP("loop%s%s 0x%llx\n", xtra
, haveASO(pfx
) ? "l" : "", (ULong
)d64
);
21338 /* JRCXZ or JECXZ, depending address size override. */
21339 if (have66orF2orF3(pfx
)) goto decode_failure
;
21340 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21342 if (haveASO(pfx
)) {
21344 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21345 unop(Iop_32Uto64
, getIReg32(R_RCX
)),
21351 DIP("jecxz 0x%llx\n", (ULong
)d64
);
21354 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21361 DIP("jrcxz 0x%llx\n", (ULong
)d64
);
21365 case 0xE4: /* IN imm8, AL */
21367 t1
= newTemp(Ity_I64
);
21368 abyte
= getUChar(delta
); delta
++;
21369 assign(t1
, mkU64( abyte
& 0xFF ));
21370 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21372 case 0xE5: /* IN imm8, eAX */
21373 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21374 t1
= newTemp(Ity_I64
);
21375 abyte
= getUChar(delta
); delta
++;
21376 assign(t1
, mkU64( abyte
& 0xFF ));
21377 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21379 case 0xEC: /* IN %DX, AL */
21381 t1
= newTemp(Ity_I64
);
21382 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21383 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21386 case 0xED: /* IN %DX, eAX */
21387 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21388 t1
= newTemp(Ity_I64
);
21389 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21390 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21394 /* At this point, sz indicates the width, and t1 is a 64-bit
21395 value giving port number. */
21397 if (haveF2orF3(pfx
)) goto decode_failure
;
21398 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21400 t2
= newTemp(Ity_I64
);
21401 d
= unsafeIRDirty_1_N(
21404 "amd64g_dirtyhelper_IN",
21405 &amd64g_dirtyhelper_IN
,
21406 mkIRExprVec_2( mkexpr(t1
), mkU64(sz
) )
21408 /* do the call, dumping the result in t2. */
21409 stmt( IRStmt_Dirty(d
) );
21410 putIRegRAX(sz
, narrowTo( ty
, mkexpr(t2
) ) );
21414 case 0xE6: /* OUT AL, imm8 */
21416 t1
= newTemp(Ity_I64
);
21417 abyte
= getUChar(delta
); delta
++;
21418 assign( t1
, mkU64( abyte
& 0xFF ) );
21419 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21421 case 0xE7: /* OUT eAX, imm8 */
21422 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21423 t1
= newTemp(Ity_I64
);
21424 abyte
= getUChar(delta
); delta
++;
21425 assign( t1
, mkU64( abyte
& 0xFF ) );
21426 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21428 case 0xEE: /* OUT AL, %DX */
21430 t1
= newTemp(Ity_I64
);
21431 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21432 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21435 case 0xEF: /* OUT eAX, %DX */
21436 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21437 t1
= newTemp(Ity_I64
);
21438 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21439 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21443 /* At this point, sz indicates the width, and t1 is a 64-bit
21444 value giving port number. */
21446 if (haveF2orF3(pfx
)) goto decode_failure
;
21447 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21449 d
= unsafeIRDirty_0_N(
21451 "amd64g_dirtyhelper_OUT",
21452 &amd64g_dirtyhelper_OUT
,
21453 mkIRExprVec_3( mkexpr(t1
),
21454 widenUto64( getIRegRAX(sz
) ),
21457 stmt( IRStmt_Dirty(d
) );
21461 case 0xE8: /* CALL J4 */
21462 if (haveF3(pfx
)) goto decode_failure
;
21463 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21464 d64
= getSDisp32(delta
); delta
+= 4;
21465 d64
+= (guest_RIP_bbstart
+delta
);
21466 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21467 t1
= newTemp(Ity_I64
);
21468 assign(t1
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21469 putIReg64(R_RSP
, mkexpr(t1
));
21470 storeLE( mkexpr(t1
), mkU64(guest_RIP_bbstart
+delta
));
21471 t2
= newTemp(Ity_I64
);
21472 assign(t2
, mkU64((Addr64
)d64
));
21473 make_redzone_AbiHint(vbi
, t1
, t2
/*nia*/, "call-d32");
21474 jmp_lit(dres
, Ijk_Call
, d64
);
21475 vassert(dres
->whatNext
== Dis_StopHere
);
21476 DIP("call 0x%llx\n", (ULong
)d64
);
21479 case 0xE9: /* Jv (jump, 16/32 offset) */
21480 if (haveF3(pfx
)) goto decode_failure
;
21481 sz
= 4; /* Prefixes that change operand size are ignored for this
21482 instruction. Operand size is forced to 32bit. */
21483 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21484 d64
= (guest_RIP_bbstart
+delta
+sz
) + getSDisp(sz
,delta
);
21486 jmp_lit(dres
, Ijk_Boring
, d64
);
21487 vassert(dres
->whatNext
== Dis_StopHere
);
21488 DIP("jmp 0x%llx\n", (ULong
)d64
);
21491 case 0xEB: /* Jb (jump, byte offset) */
21492 if (haveF3(pfx
)) goto decode_failure
;
21493 /* Prefixes that change operand size are ignored for this instruction. */
21494 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21495 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21497 jmp_lit(dres
, Ijk_Boring
, d64
);
21498 vassert(dres
->whatNext
== Dis_StopHere
);
21499 DIP("jmp-8 0x%llx\n", (ULong
)d64
);
21502 case 0xF5: /* CMC */
21503 case 0xF8: /* CLC */
21504 case 0xF9: /* STC */
21505 t1
= newTemp(Ity_I64
);
21506 t2
= newTemp(Ity_I64
);
21507 assign( t1
, mk_amd64g_calculate_rflags_all() );
21510 assign( t2
, binop(Iop_Xor64
, mkexpr(t1
),
21511 mkU64(AMD64G_CC_MASK_C
)));
21515 assign( t2
, binop(Iop_And64
, mkexpr(t1
),
21516 mkU64(~AMD64G_CC_MASK_C
)));
21520 assign( t2
, binop(Iop_Or64
, mkexpr(t1
),
21521 mkU64(AMD64G_CC_MASK_C
)));
21525 vpanic("disInstr(x64)(cmc/clc/stc)");
21527 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21528 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21529 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t2
) ));
21530 /* Set NDEP even though it isn't used. This makes redundant-PUT
21531 elimination of previous stores to this field work better. */
21532 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21535 case 0xF6: { /* Grp3 Eb */
21536 Bool decode_OK
= True
;
21537 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21538 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21539 delta
= dis_Grp3 ( vbi
, pfx
, 1, delta
, &decode_OK
);
21540 if (!decode_OK
) goto decode_failure
;
21544 case 0xF7: { /* Grp3 Ev */
21545 Bool decode_OK
= True
;
21546 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21547 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21548 delta
= dis_Grp3 ( vbi
, pfx
, sz
, delta
, &decode_OK
);
21549 if (!decode_OK
) goto decode_failure
;
21553 case 0xFC: /* CLD */
21554 if (haveF2orF3(pfx
)) goto decode_failure
;
21555 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(1)) );
21559 case 0xFD: /* STD */
21560 if (haveF2orF3(pfx
)) goto decode_failure
;
21561 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(-1ULL)) );
21565 case 0xFE: { /* Grp4 Eb */
21566 Bool decode_OK
= True
;
21567 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21568 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21569 delta
= dis_Grp4 ( vbi
, pfx
, delta
, &decode_OK
);
21570 if (!decode_OK
) goto decode_failure
;
21574 case 0xFF: { /* Grp5 Ev */
21575 Bool decode_OK
= True
;
21576 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21577 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21578 delta
= dis_Grp5 ( vbi
, pfx
, sz
, delta
, dres
, &decode_OK
);
21579 if (!decode_OK
) goto decode_failure
;
21589 return deltaIN
; /* fail */
21593 /*------------------------------------------------------------*/
21595 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21597 /*------------------------------------------------------------*/
21599 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
21601 IRTemp t2
= newTemp(ty
);
21602 if (ty
== Ity_I64
) {
21603 IRTemp m8
= newTemp(Ity_I64
);
21604 IRTemp s8
= newTemp(Ity_I64
);
21605 IRTemp m16
= newTemp(Ity_I64
);
21606 IRTemp s16
= newTemp(Ity_I64
);
21607 IRTemp m32
= newTemp(Ity_I64
);
21608 assign( m8
, mkU64(0xFF00FF00FF00FF00ULL
) );
21612 binop(Iop_And64
,mkexpr(t1
),mkexpr(m8
)),
21615 binop(Iop_Shl64
,mkexpr(t1
),mkU8(8)),
21620 assign( m16
, mkU64(0xFFFF0000FFFF0000ULL
) );
21624 binop(Iop_And64
,mkexpr(s8
),mkexpr(m16
)),
21627 binop(Iop_Shl64
,mkexpr(s8
),mkU8(16)),
21632 assign( m32
, mkU64(0xFFFFFFFF00000000ULL
) );
21636 binop(Iop_And64
,mkexpr(s16
),mkexpr(m32
)),
21639 binop(Iop_Shl64
,mkexpr(s16
),mkU8(32)),
21645 if (ty
== Ity_I32
) {
21649 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
21652 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
21653 mkU32(0x00FF0000)),
21655 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
21656 mkU32(0x0000FF00)),
21657 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
21658 mkU32(0x000000FF) )
21663 if (ty
== Ity_I16
) {
21666 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
21667 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
21672 return IRTemp_INVALID
;
21676 __attribute__((noinline
))
21679 /*MB_OUT*/DisResult
* dres
,
21680 /*MB_OUT*/Bool
* expect_CAS
,
21681 const VexArchInfo
* archinfo
,
21682 const VexAbiInfo
* vbi
,
21683 Prefix pfx
, Int sz
, Long deltaIN
21687 IRTemp addr
= IRTemp_INVALID
;
21688 IRTemp t1
= IRTemp_INVALID
;
21689 IRTemp t2
= IRTemp_INVALID
;
21695 /* In the first switch, look for ordinary integer insns. */
21696 Long delta
= deltaIN
;
21697 UChar opc
= getUChar(delta
);
21699 switch (opc
) { /* first switch */
21703 modrm
= getUChar(delta
);
21704 /* 0F 01 /0 -- SGDT */
21705 /* 0F 01 /1 -- SIDT */
21706 if (!epartIsReg(modrm
)
21707 && (gregLO3ofRM(modrm
) == 0 || gregLO3ofRM(modrm
) == 1)) {
21708 /* This is really revolting, but ... since each processor
21709 (core) only has one IDT and one GDT, just let the guest
21710 see it (pass-through semantics). I can't see any way to
21711 construct a faked-up value, so don't bother to try. */
21712 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21714 switch (gregLO3ofRM(modrm
)) {
21715 case 0: DIP("sgdt %s\n", dis_buf
); break;
21716 case 1: DIP("sidt %s\n", dis_buf
); break;
21717 default: vassert(0); /*NOTREACHED*/
21719 IRDirty
* d
= unsafeIRDirty_0_N (
21721 "amd64g_dirtyhelper_SxDT",
21722 &amd64g_dirtyhelper_SxDT
,
21723 mkIRExprVec_2( mkexpr(addr
),
21724 mkU64(gregLO3ofRM(modrm
)) )
21726 /* declare we're writing memory */
21727 d
->mFx
= Ifx_Write
;
21728 d
->mAddr
= mkexpr(addr
);
21730 stmt( IRStmt_Dirty(d
) );
21733 /* 0F 01 D0 = XGETBV */
21734 if (modrm
== 0xD0 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21737 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21738 am not sure if that translates in to SEGV or to something
21739 else, in user space. */
21740 t1
= newTemp(Ity_I32
);
21741 assign( t1
, getIReg32(R_RCX
) );
21742 stmt( IRStmt_Exit(binop(Iop_CmpNE32
, mkexpr(t1
), mkU32(0)),
21744 IRConst_U64(guest_RIP_curr_instr
),
21747 putIRegRAX(4, mkU32(7));
21748 putIRegRDX(4, mkU32(0));
21751 /* BEGIN HACKY SUPPORT FOR xend */
21752 /* 0F 01 D5 = XEND */
21753 if (modrm
== 0xD5 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21754 /* We are never in an transaction (xbegin immediately aborts).
21755 So this just always generates a General Protection Fault. */
21757 jmp_lit(dres
, Ijk_SigSEGV
, guest_RIP_bbstart
+ delta
);
21758 vassert(dres
->whatNext
== Dis_StopHere
);
21762 /* END HACKY SUPPORT FOR xend */
21763 /* BEGIN HACKY SUPPORT FOR xtest */
21764 /* 0F 01 D6 = XTEST */
21765 if (modrm
== 0xD6 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21766 /* Sets ZF because there never is a transaction, and all
21767 CF, OF, SF, PF and AF are always cleared by xtest. */
21770 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21771 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21772 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkU64(AMD64G_CC_MASK_Z
) ));
21773 /* Set NDEP even though it isn't used. This makes redundant-PUT
21774 elimination of previous stores to this field work better. */
21775 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21778 /* END HACKY SUPPORT FOR xtest */
21779 /* 0F 01 F9 = RDTSCP */
21780 if (modrm
== 0xF9 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDTSCP
)) {
21782 /* Uses dirty helper:
21783 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21784 declared to wr rax, rcx, rdx
21786 const HChar
* fName
= "amd64g_dirtyhelper_RDTSCP";
21787 void* fAddr
= &amd64g_dirtyhelper_RDTSCP
;
21789 = unsafeIRDirty_0_N ( 0/*regparms*/,
21790 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
21791 /* declare guest state effects */
21793 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
21794 d
->fxState
[0].fx
= Ifx_Write
;
21795 d
->fxState
[0].offset
= OFFB_RAX
;
21796 d
->fxState
[0].size
= 8;
21797 d
->fxState
[1].fx
= Ifx_Write
;
21798 d
->fxState
[1].offset
= OFFB_RCX
;
21799 d
->fxState
[1].size
= 8;
21800 d
->fxState
[2].fx
= Ifx_Write
;
21801 d
->fxState
[2].offset
= OFFB_RDX
;
21802 d
->fxState
[2].size
= 8;
21803 /* execute the dirty call, side-effecting guest state */
21804 stmt( IRStmt_Dirty(d
) );
21805 /* RDTSCP is a serialising insn. So, just in case someone is
21806 using it as a memory fence ... */
21807 stmt( IRStmt_MBE(Imbe_Fence
) );
21811 /* else decode failed */
21815 case 0x05: /* SYSCALL */
21816 guest_RIP_next_mustcheck
= True
;
21817 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21818 putIReg64( R_RCX
, mkU64(guest_RIP_next_assumed
) );
21819 /* It's important that all guest state is up-to-date
21820 at this point. So we declare an end-of-block here, which
21821 forces any cached guest state to be flushed. */
21822 jmp_lit(dres
, Ijk_Sys_syscall
, guest_RIP_next_assumed
);
21823 vassert(dres
->whatNext
== Dis_StopHere
);
21827 case 0x0B: /* UD2 */
21828 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
21829 jmp_lit(dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
21830 vassert(dres
->whatNext
== Dis_StopHere
);
21834 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21835 /* 0F 0D /1 -- prefetchw mem8 */
21836 if (have66orF2orF3(pfx
)) goto decode_failure
;
21837 modrm
= getUChar(delta
);
21838 if (epartIsReg(modrm
)) goto decode_failure
;
21839 if (gregLO3ofRM(modrm
) != 0 && gregLO3ofRM(modrm
) != 1)
21840 goto decode_failure
;
21841 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21843 switch (gregLO3ofRM(modrm
)) {
21844 case 0: DIP("prefetch %s\n", dis_buf
); break;
21845 case 1: DIP("prefetchw %s\n", dis_buf
); break;
21846 default: vassert(0); /*NOTREACHED*/
21855 // Intel CET instructions can have any prefixes before NOPs
21856 // and can use any ModRM, SIB and disp
21857 modrm
= getUChar(delta
);
21858 if (epartIsReg(modrm
)) {
21860 DIP("nop%c\n", nameISize(sz
));
21862 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21864 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
21868 case 0x31: { /* RDTSC */
21869 IRTemp val
= newTemp(Ity_I64
);
21870 IRExpr
** args
= mkIRExprVec_0();
21871 IRDirty
* d
= unsafeIRDirty_1_N (
21874 "amd64g_dirtyhelper_RDTSC",
21875 &amd64g_dirtyhelper_RDTSC
,
21878 if (have66orF2orF3(pfx
)) goto decode_failure
;
21879 /* execute the dirty call, dumping the result in val. */
21880 stmt( IRStmt_Dirty(d
) );
21881 putIRegRDX(4, unop(Iop_64HIto32
, mkexpr(val
)));
21882 putIRegRAX(4, unop(Iop_64to32
, mkexpr(val
)));
21889 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21890 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21891 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21892 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21893 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21894 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21895 case 0x48: /* CMOVSb (cmov negative) */
21896 case 0x49: /* CMOVSb (cmov not negative) */
21897 case 0x4A: /* CMOVP (cmov parity even) */
21898 case 0x4B: /* CMOVNP (cmov parity odd) */
21899 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21900 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21901 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21902 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21903 if (haveF2orF3(pfx
)) goto decode_failure
;
21904 delta
= dis_cmov_E_G(vbi
, pfx
, sz
, (AMD64Condcode
)(opc
- 0x40), delta
);
21909 case 0x82: /* JBb/JNAEb (jump below) */
21910 case 0x83: /* JNBb/JAEb (jump not below) */
21911 case 0x84: /* JZb/JEb (jump zero) */
21912 case 0x85: /* JNZb/JNEb (jump not zero) */
21913 case 0x86: /* JBEb/JNAb (jump below or equal) */
21914 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21915 case 0x88: /* JSb (jump negative) */
21916 case 0x89: /* JSb (jump not negative) */
21917 case 0x8A: /* JP (jump parity even) */
21918 case 0x8B: /* JNP/JPO (jump parity odd) */
21919 case 0x8C: /* JLb/JNGEb (jump less) */
21920 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21921 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21922 case 0x8F: { /* JGb/JNLEb (jump greater) */
21924 const HChar
* comment
= "";
21925 if (haveF3(pfx
)) goto decode_failure
;
21926 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21927 jmpDelta
= getSDisp32(delta
);
21928 d64
= (guest_RIP_bbstart
+delta
+4) + jmpDelta
;
21930 /* End the block at this point. */
21931 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x80),
21932 guest_RIP_bbstart
+delta
, d64
);
21933 vassert(dres
->whatNext
== Dis_StopHere
);
21934 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc
- 0x80), (ULong
)d64
,
21941 case 0x92: /* set-Bb/set-NAEb (set if below) */
21942 case 0x93: /* set-NBb/set-AEb (set if not below) */
21943 case 0x94: /* set-Zb/set-Eb (set if zero) */
21944 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21945 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21946 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21947 case 0x98: /* set-Sb (set if negative) */
21948 case 0x99: /* set-Sb (set if not negative) */
21949 case 0x9A: /* set-P (set if parity even) */
21950 case 0x9B: /* set-NP (set if parity odd) */
21951 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21952 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21953 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21954 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21955 if (haveF2orF3(pfx
)) goto decode_failure
;
21956 t1
= newTemp(Ity_I8
);
21957 assign( t1
, unop(Iop_1Uto8
,mk_amd64g_calculate_condition(opc
-0x90)) );
21958 modrm
= getUChar(delta
);
21959 if (epartIsReg(modrm
)) {
21961 putIRegE(1, pfx
, modrm
, mkexpr(t1
));
21962 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90),
21963 nameIRegE(1,pfx
,modrm
));
21965 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21967 storeLE( mkexpr(addr
), mkexpr(t1
) );
21968 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90), dis_buf
);
21973 case 0x1B: { /* Future MPX instructions, currently NOPs.
21974 BNDMK b, m F3 0F 1B
21975 BNDCL b, r/m F3 0F 1A
21976 BNDCU b, r/m F2 0F 1A
21977 BNDCN b, r/m F2 0F 1B
21978 BNDMOV b, b/m 66 0F 1A
21979 BNDMOV b/m, b 66 0F 1B
21980 BNDLDX b, mib 0F 1A
21981 BNDSTX mib, b 0F 1B */
21983 /* All instructions have two operands. One operand is always the
21984 bnd register number (bnd0-bnd3, other register numbers are
21985 ignored when MPX isn't enabled, but should generate an
21986 exception if MPX is enabled) given by gregOfRexRM. The other
21987 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21988 address, all of which can be decoded by using either
21989 eregOfRexRM or disAMode. */
21991 modrm
= getUChar(delta
);
21992 int bnd
= gregOfRexRM(pfx
,modrm
);
21994 if (epartIsReg(modrm
)) {
21995 oper
= nameIReg64 (eregOfRexRM(pfx
,modrm
));
21998 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22003 if (haveF3no66noF2 (pfx
)) {
22005 DIP ("bndmk %s, %%bnd%d\n", oper
, bnd
);
22006 } else /* opc == 0x1A */ {
22007 DIP ("bndcl %s, %%bnd%d\n", oper
, bnd
);
22009 } else if (haveF2no66noF3 (pfx
)) {
22011 DIP ("bndcu %s, %%bnd%d\n", oper
, bnd
);
22012 } else /* opc == 0x1B */ {
22013 DIP ("bndcn %s, %%bnd%d\n", oper
, bnd
);
22015 } else if (have66noF2noF3 (pfx
)) {
22017 DIP ("bndmov %s, %%bnd%d\n", oper
, bnd
);
22018 } else /* opc == 0x1B */ {
22019 DIP ("bndmov %%bnd%d, %s\n", bnd
, oper
);
22021 } else if (haveNo66noF2noF3 (pfx
)) {
22023 DIP ("bndldx %s, %%bnd%d\n", oper
, bnd
);
22024 } else /* opc == 0x1B */ {
22025 DIP ("bndstx %%bnd%d, %s\n", bnd
, oper
);
22027 } else goto decode_failure
;
22032 case 0xA2: { /* CPUID */
22033 /* Uses dirty helper:
22034 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22035 declared to mod rax, wr rbx, rcx, rdx
22038 const HChar
* fName
= NULL
;
22039 void* fAddr
= NULL
;
22041 if (haveF2orF3(pfx
)) goto decode_failure
;
22043 /* This isn't entirely correct, CPUID should depend on the VEX
22044 capabilities, not on the underlying CPU. See bug #324882. */
22045 if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22046 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22047 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX2
)) {
22048 fName
= "amd64g_dirtyhelper_CPUID_avx2";
22049 fAddr
= &amd64g_dirtyhelper_CPUID_avx2
;
22050 /* This is a Core-i7-4910-like machine */
22052 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22053 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22054 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22055 fName
= "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22056 fAddr
= &amd64g_dirtyhelper_CPUID_avx_and_cx16
;
22057 /* This is a Core-i5-2300-like machine */
22059 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22060 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22061 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDTSCP
)) {
22062 fName
= "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22063 fAddr
= &amd64g_dirtyhelper_CPUID_sse42_and_cx16
;
22065 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22066 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
)) {
22067 fName
= "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
22068 fAddr
= &amd64g_dirtyhelper_CPUID_sse3_and_cx16
;
22069 /* This is a Core-i5-670-like machine */
22072 /* Give a CPUID for at least a baseline machine, SSE2
22073 only, and no CX16 */
22074 fName
= "amd64g_dirtyhelper_CPUID_baseline";
22075 fAddr
= &amd64g_dirtyhelper_CPUID_baseline
;
22078 vassert(fName
); vassert(fAddr
);
22079 IRExpr
** args
= NULL
;
22080 if (fAddr
== &amd64g_dirtyhelper_CPUID_avx2
22081 || fAddr
== &amd64g_dirtyhelper_CPUID_avx_and_cx16
) {
22082 Bool hasF16C
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
) != 0;
22083 Bool hasRDRAND
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDRAND
) != 0;
22084 Bool hasRDSEED
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDSEED
) != 0;
22085 args
= mkIRExprVec_4(IRExpr_GSPTR(),
22086 mkIRExpr_HWord(hasF16C
? 1 : 0),
22087 mkIRExpr_HWord(hasRDRAND
? 1 : 0),
22088 mkIRExpr_HWord(hasRDSEED
? 1 : 0));
22090 args
= mkIRExprVec_1(IRExpr_GSPTR());
22092 d
= unsafeIRDirty_0_N ( 0/*regparms*/, fName
, fAddr
, args
);
22094 /* Declare guest state effects. EAX, EBX, ECX and EDX are written. EAX
22095 is also read, hence is marked as Modified. ECX is sometimes also
22096 read, depending on the value in EAX; that much is obvious from
22097 inspection of the helper function.
22099 This is a bit of a problem: if we mark ECX as Modified -- hence, by
22100 implication, Read -- then we may get false positives from Memcheck in
22101 the case where ECX contains undefined bits, but the EAX value is such
22102 that the instruction wouldn't read ECX anyway. The obvious way out
22103 of this is to mark it as written only, but that means Memcheck will
22104 effectively ignore undefinedness in the incoming ECX value. That
22105 seems like a small loss to take to avoid false positives here,
22106 though. Fundamentally the problem exists because CPUID itself has
22107 conditional dataflow -- whether ECX is read depends on the value in
22108 EAX -- but the annotation mechanism for dirty helpers can't represent
22109 that conditionality.
22111 A fully-accurate solution might be to change the helpers so that the
22112 EAX and ECX values are passed as parameters. Then, for the ECX
22113 value, we can pass, effectively "if EAX is some value for which ECX
22114 is ignored { 0 } else { ECX }", and Memcheck will see and understand
22115 this conditionality. */
22117 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
22118 d
->fxState
[0].fx
= Ifx_Modify
;
22119 d
->fxState
[0].offset
= OFFB_RAX
;
22120 d
->fxState
[0].size
= 8;
22121 d
->fxState
[1].fx
= Ifx_Write
;
22122 d
->fxState
[1].offset
= OFFB_RBX
;
22123 d
->fxState
[1].size
= 8;
22124 d
->fxState
[2].fx
= Ifx_Write
; /* was: Ifx_Modify; */
22125 d
->fxState
[2].offset
= OFFB_RCX
;
22126 d
->fxState
[2].size
= 8;
22127 d
->fxState
[3].fx
= Ifx_Write
;
22128 d
->fxState
[3].offset
= OFFB_RDX
;
22129 d
->fxState
[3].size
= 8;
22130 /* Execute the dirty call, side-effecting guest state. */
22131 stmt( IRStmt_Dirty(d
) );
22132 /* CPUID is a serialising insn. So, just in case someone is
22133 using it as a memory fence ... */
22134 stmt( IRStmt_MBE(Imbe_Fence
) );
22139 case 0xA3: { /* BT Gv,Ev */
22140 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22142 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22143 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpNone
, &ok
);
22144 if (!ok
) goto decode_failure
;
22148 case 0xA4: /* SHLDv imm8,Gv,Ev */
22149 modrm
= getUChar(delta
);
22150 d64
= delta
+ lengthAMode(pfx
, delta
);
22151 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22152 delta
= dis_SHLRD_Gv_Ev (
22153 vbi
, pfx
, delta
, modrm
, sz
,
22154 mkU8(getUChar(d64
)), True
, /* literal */
22155 dis_buf
, True
/* left */ );
22158 case 0xA5: /* SHLDv %cl,Gv,Ev */
22159 modrm
= getUChar(delta
);
22160 delta
= dis_SHLRD_Gv_Ev (
22161 vbi
, pfx
, delta
, modrm
, sz
,
22162 getIRegCL(), False
, /* not literal */
22163 "%cl", True
/* left */ );
22166 case 0xAB: { /* BTS Gv,Ev */
22167 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22169 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22170 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpSet
, &ok
);
22171 if (!ok
) goto decode_failure
;
22175 case 0xAC: /* SHRDv imm8,Gv,Ev */
22176 modrm
= getUChar(delta
);
22177 d64
= delta
+ lengthAMode(pfx
, delta
);
22178 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22179 delta
= dis_SHLRD_Gv_Ev (
22180 vbi
, pfx
, delta
, modrm
, sz
,
22181 mkU8(getUChar(d64
)), True
, /* literal */
22182 dis_buf
, False
/* right */ );
22185 case 0xAD: /* SHRDv %cl,Gv,Ev */
22186 modrm
= getUChar(delta
);
22187 delta
= dis_SHLRD_Gv_Ev (
22188 vbi
, pfx
, delta
, modrm
, sz
,
22189 getIRegCL(), False
, /* not literal */
22190 "%cl", False
/* right */);
22193 case 0xAF: /* IMUL Ev, Gv */
22194 if (haveF2orF3(pfx
)) goto decode_failure
;
22195 delta
= dis_mul_E_G ( vbi
, pfx
, sz
, delta
);
22198 case 0xB0: { /* CMPXCHG Gb,Eb */
22200 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22201 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, 1, delta
);
22202 if (!ok
) goto decode_failure
;
22206 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22208 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22209 if (sz
!= 2 && sz
!= 4 && sz
!= 8) goto decode_failure
;
22210 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, sz
, delta
);
22211 if (!ok
) goto decode_failure
;
22215 case 0xB3: { /* BTR Gv,Ev */
22216 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22218 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22219 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpReset
, &ok
);
22220 if (!ok
) goto decode_failure
;
22224 case 0xB6: /* MOVZXb Eb,Gv */
22225 if (haveF2orF3(pfx
)) goto decode_failure
;
22226 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22227 goto decode_failure
;
22228 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, False
);
22231 case 0xB7: /* MOVZXw Ew,Gv */
22232 if (haveF2orF3(pfx
)) goto decode_failure
;
22233 if (sz
!= 4 && sz
!= 8)
22234 goto decode_failure
;
22235 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, False
);
22238 case 0xBA: { /* Grp8 Ib,Ev */
22239 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22240 Bool decode_OK
= False
;
22241 modrm
= getUChar(delta
);
22242 am_sz
= lengthAMode(pfx
,delta
);
22243 d64
= getSDisp8(delta
+ am_sz
);
22244 delta
= dis_Grp8_Imm ( vbi
, pfx
, delta
, modrm
, am_sz
, sz
, d64
,
22247 goto decode_failure
;
22251 case 0xBB: { /* BTC Gv,Ev */
22252 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22254 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22255 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpComp
, &ok
);
22256 if (!ok
) goto decode_failure
;
22260 case 0xBC: /* BSF Gv,Ev */
22261 if (!haveF2orF3(pfx
)
22262 || (haveF3noF2(pfx
)
22263 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
))) {
22264 /* no-F2 no-F3 0F BC = BSF
22265 or F3 0F BC = REP; BSF on older CPUs. */
22266 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, True
);
22269 /* Fall through, since F3 0F BC is TZCNT, and needs to
22270 be handled by dis_ESC_0F__SSE4. */
22273 case 0xBD: /* BSR Gv,Ev */
22274 if (!haveF2orF3(pfx
)
22275 || (haveF3noF2(pfx
)
22276 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
))) {
22277 /* no-F2 no-F3 0F BD = BSR
22278 or F3 0F BD = REP; BSR on older CPUs. */
22279 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, False
);
22282 /* Fall through, since F3 0F BD is LZCNT, and needs to
22283 be handled by dis_ESC_0F__SSE4. */
22286 case 0xBE: /* MOVSXb Eb,Gv */
22287 if (haveF2orF3(pfx
)) goto decode_failure
;
22288 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22289 goto decode_failure
;
22290 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, True
);
22293 case 0xBF: /* MOVSXw Ew,Gv */
22294 if (haveF2orF3(pfx
)) goto decode_failure
;
22295 if (sz
!= 4 && sz
!= 8)
22296 goto decode_failure
;
22297 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, True
);
22300 case 0xC0: { /* XADD Gb,Eb */
22301 Bool decode_OK
= False
;
22302 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, 1, delta
);
22304 goto decode_failure
;
22308 case 0xC1: { /* XADD Gv,Ev */
22309 Bool decode_OK
= False
;
22310 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, sz
, delta
);
22312 goto decode_failure
;
22317 modrm
= getUChar(delta
);
22319 // Detecting valid CMPXCHG combinations is pretty complex.
22320 Bool isValidCMPXCHG
= gregLO3ofRM(modrm
) == 1;
22321 if (isValidCMPXCHG
) {
22322 if (have66(pfx
)) isValidCMPXCHG
= False
;
22323 if (sz
!= 4 && sz
!= 8) isValidCMPXCHG
= False
;
22324 if (sz
== 8 && !(archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
))
22325 isValidCMPXCHG
= False
;
22326 if (epartIsReg(modrm
)) isValidCMPXCHG
= False
;
22327 if (haveF2orF3(pfx
)) {
22328 /* Since the e-part is memory only, F2 or F3 (one or the
22329 other) is acceptable if LOCK is also present. But only
22331 if (sz
== 8) isValidCMPXCHG
= False
;
22332 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) isValidCMPXCHG
= False
;
22336 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22337 if (isValidCMPXCHG
) {
22338 // Note that we've already read the modrm byte by this point, but we
22339 // haven't moved delta past it.
22340 IRType elemTy
= sz
==4 ? Ity_I32
: Ity_I64
;
22341 IRTemp expdHi
= newTemp(elemTy
);
22342 IRTemp expdLo
= newTemp(elemTy
);
22343 IRTemp dataHi
= newTemp(elemTy
);
22344 IRTemp dataLo
= newTemp(elemTy
);
22345 IRTemp oldHi
= newTemp(elemTy
);
22346 IRTemp oldLo
= newTemp(elemTy
);
22347 IRTemp flags_old
= newTemp(Ity_I64
);
22348 IRTemp flags_new
= newTemp(Ity_I64
);
22349 IRTemp success
= newTemp(Ity_I1
);
22350 IROp opOR
= sz
==4 ? Iop_Or32
: Iop_Or64
;
22351 IROp opXOR
= sz
==4 ? Iop_Xor32
: Iop_Xor64
;
22352 IROp opCasCmpEQ
= sz
==4 ? Iop_CasCmpEQ32
: Iop_CasCmpEQ64
;
22353 IRExpr
* zero
= sz
==4 ? mkU32(0) : mkU64(0);
22354 IRTemp expdHi64
= newTemp(Ity_I64
);
22355 IRTemp expdLo64
= newTemp(Ity_I64
);
22357 /* Translate this using a DCAS, even if there is no LOCK
22358 prefix. Life is too short to bother with generating two
22359 different translations for the with/without-LOCK-prefix
22361 *expect_CAS
= True
;
22363 /* Generate address */
22364 vassert(!epartIsReg(modrm
));
22365 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22368 /* cmpxchg16b requires an alignment check. */
22370 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
22372 /* Get the expected and new values. */
22373 assign( expdHi64
, getIReg64(R_RDX
) );
22374 assign( expdLo64
, getIReg64(R_RAX
) );
22376 /* These are the correctly-sized expected and new values.
22377 However, we also get expdHi64/expdLo64 above as 64-bits
22378 regardless, because we will need them later in the 32-bit
22379 case (paradoxically). */
22380 assign( expdHi
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdHi64
))
22381 : mkexpr(expdHi64
) );
22382 assign( expdLo
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdLo64
))
22383 : mkexpr(expdLo64
) );
22384 assign( dataHi
, sz
==4 ? getIReg32(R_RCX
) : getIReg64(R_RCX
) );
22385 assign( dataLo
, sz
==4 ? getIReg32(R_RBX
) : getIReg64(R_RBX
) );
22389 mkIRCAS( oldHi
, oldLo
,
22390 Iend_LE
, mkexpr(addr
),
22391 mkexpr(expdHi
), mkexpr(expdLo
),
22392 mkexpr(dataHi
), mkexpr(dataLo
)
22395 /* success when oldHi:oldLo == expdHi:expdLo */
22399 binop(opXOR
, mkexpr(oldHi
), mkexpr(expdHi
)),
22400 binop(opXOR
, mkexpr(oldLo
), mkexpr(expdLo
))
22405 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22406 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22407 which is where they came from originally. Both the actual
22408 contents of these two regs, and any shadow values, are
22409 unchanged. If the DCAS fails then we're putting into
22410 RDX:RAX the value seen in memory. */
22411 /* Now of course there's a complication in the 32-bit case
22412 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22413 unchanged; but if we use the same scheme as in the 64-bit
22414 case, we get hit by the standard rule that a write to the
22415 bottom 32 bits of an integer register zeros the upper 32
22416 bits. And so the upper halves of RDX and RAX mysteriously
22417 become zero. So we have to stuff back in the original
22418 64-bit values which we previously stashed in
22419 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22420 /* It's just _so_ much fun ... */
22422 IRExpr_ITE( mkexpr(success
),
22424 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldHi
))
22428 IRExpr_ITE( mkexpr(success
),
22430 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldLo
))
22434 /* Copy the success bit into the Z flag and leave the others
22436 assign( flags_old
, widenUto64(mk_amd64g_calculate_rflags_all()));
22440 binop(Iop_And64
, mkexpr(flags_old
),
22441 mkU64(~AMD64G_CC_MASK_Z
)),
22444 unop(Iop_1Uto64
, mkexpr(success
)), mkU64(1)),
22445 mkU8(AMD64G_CC_SHIFT_Z
)) ));
22447 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22448 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
22449 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22450 /* Set NDEP even though it isn't used. This makes
22451 redundant-PUT elimination of previous stores to this field
22453 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22455 /* Sheesh. Aren't you glad it was me and not you that had to
22456 write and validate all this grunge? */
22458 DIP("cmpxchg8b %s\n", dis_buf
);
22460 } // if (isValidCMPXCHG)
22462 /* 0F C7 /6 no-F2-or-F3 = RDRAND, 0F C7 /7 = RDSEED */
22463 int insn
= gregLO3ofRM(modrm
);
22464 if (((insn
== 6 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDRAND
))
22465 || (insn
== 7 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDSEED
)))
22466 && epartIsReg(modrm
) && haveNoF2noF3(pfx
)
22467 && (sz
== 8 || sz
== 4 || sz
== 2)) {
22469 delta
++; // move past modrm
22470 IRType ty
= szToITy(sz
);
22472 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22473 IRTemp pairLO
= newTemp(Ity_I64
);
22475 if (insn
== 6) /* RDRAND */
22476 dLO
= unsafeIRDirty_1_N(pairLO
, 0/*regparms*/,
22477 "amd64g_dirtyhelper_RDRAND",
22478 &amd64g_dirtyhelper_RDRAND
, mkIRExprVec_0());
22480 dLO
= unsafeIRDirty_1_N(pairLO
, 0/*regparms*/,
22481 "amd64g_dirtyhelper_RDSEED",
22482 &amd64g_dirtyhelper_RDSEED
, mkIRExprVec_0());
22484 // There are no guest state or memory effects to declare for |dLO|.
22485 stmt( IRStmt_Dirty(dLO
) );
22487 IRTemp randsLO
= newTemp(Ity_I32
);
22488 assign(randsLO
, unop(Iop_64to32
, mkexpr(pairLO
)));
22489 IRTemp cLO
= newTemp(Ity_I64
);
22490 assign(cLO
, binop(Iop_Shr64
, mkexpr(pairLO
), mkU8(32)));
22492 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22493 IRTemp randsNearlyFinal
= newTemp(Ity_I64
);
22494 IRTemp cFinal
= newTemp(Ity_I64
);
22496 if (ty
== Ity_I64
) {
22497 // Pull another 32 bits of randomness out of the host.
22498 IRTemp pairHI
= newTemp(Ity_I64
);
22500 if (insn
== 6) /* RDRAND */
22501 dHI
= unsafeIRDirty_1_N(pairHI
, 0/*regparms*/,
22502 "amd64g_dirtyhelper_RDRAND",
22503 &amd64g_dirtyhelper_RDRAND
, mkIRExprVec_0());
22505 dHI
= unsafeIRDirty_1_N(pairHI
, 0/*regparms*/,
22506 "amd64g_dirtyhelper_RDSEED",
22507 &amd64g_dirtyhelper_RDSEED
, mkIRExprVec_0());
22509 // There are no guest state or memory effects to declare for |dHI|.
22510 stmt( IRStmt_Dirty(dHI
) );
22512 IRTemp randsHI
= newTemp(Ity_I32
);
22513 assign(randsHI
, unop(Iop_64to32
, mkexpr(pairHI
)));
22514 IRTemp cHI
= newTemp(Ity_I64
);
22515 assign(cHI
, binop(Iop_Shr64
, mkexpr(pairHI
), mkU8(32)));
22516 assign(randsNearlyFinal
, binop(Iop_32HLto64
,
22517 mkexpr(randsHI
), mkexpr(randsLO
)));
22518 assign(cFinal
, binop(Iop_And64
,
22519 binop(Iop_And64
, mkexpr(cHI
), mkexpr(cLO
)),
22522 assign(randsNearlyFinal
, unop(Iop_32Uto64
, mkexpr(randsLO
)));
22523 assign(cFinal
, binop(Iop_And64
, mkexpr(cLO
), mkU64(1)));
22526 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22527 means success). But there's another twist. If we failed then the
22528 returned value must be forced to zero. Otherwise we could have the
22529 situation, when sz==8, where one of the host calls failed but the
22530 other didn't. This would give cFinal[0] == 0 (correctly) but
22531 randsNearlyFinal not being zero, because it contains the 32 bit
22532 result of the non-failing call. */
22533 IRTemp randsFinal
= newTemp(Ity_I64
);
22536 mkexpr(randsNearlyFinal
),
22538 binop(Iop_Shl64
, mkexpr(cFinal
), mkU8(63)),
22542 // So, finally, update the guest state.
22543 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(randsFinal
)));
22545 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22546 // masked so only the lowest bit remains.
22547 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22548 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(cFinal
) ));
22549 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22550 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22553 DIP("rdrand %s", nameIRegE(sz
, pfx
, modrm
));
22555 DIP("rdseed %s", nameIRegE(sz
, pfx
, modrm
));
22561 goto decode_failure
;
22564 case 0xC8: /* BSWAP %eax */
22571 case 0xCF: /* BSWAP %edi */
22572 if (haveF2orF3(pfx
)) goto decode_failure
;
22573 /* According to the AMD64 docs, this insn can have size 4 or
22576 t1
= newTemp(Ity_I32
);
22577 assign( t1
, getIRegRexB(4, pfx
, opc
-0xC8) );
22578 t2
= math_BSWAP( t1
, Ity_I32
);
22579 putIRegRexB(4, pfx
, opc
-0xC8, mkexpr(t2
));
22580 DIP("bswapl %s\n", nameIRegRexB(4, pfx
, opc
-0xC8));
22584 t1
= newTemp(Ity_I64
);
22585 t2
= newTemp(Ity_I64
);
22586 assign( t1
, getIRegRexB(8, pfx
, opc
-0xC8) );
22587 t2
= math_BSWAP( t1
, Ity_I64
);
22588 putIRegRexB(8, pfx
, opc
-0xC8, mkexpr(t2
));
22589 DIP("bswapq %s\n", nameIRegRexB(8, pfx
, opc
-0xC8));
22592 goto decode_failure
;
22597 } /* first switch */
22600 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22601 /* In the second switch, pick off MMX insns. */
22603 if (!have66orF2orF3(pfx
)) {
22604 /* So there's no SIMD prefix. */
22606 vassert(sz
== 4 || sz
== 8);
22608 switch (opc
) { /* second switch */
22612 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22614 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22615 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22616 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22617 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22621 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22624 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22627 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22631 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22634 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22637 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22639 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22640 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22642 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22646 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22650 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22652 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22653 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22654 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22658 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22662 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22664 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22665 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22666 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22667 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22669 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22673 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22677 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22679 Bool decode_OK
= False
;
22680 delta
= dis_MMX ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22683 goto decode_failure
;
22688 } /* second switch */
22692 /* A couple of MMX corner cases */
22693 if (opc
== 0x0E/* FEMMS */ || opc
== 0x77/* EMMS */) {
22695 goto decode_failure
;
22696 do_EMMS_preamble();
22701 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22702 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22703 without checking the guest hwcaps because SSE2 is a baseline
22704 facility in 64 bit mode. */
22706 Bool decode_OK
= False
;
22707 delta
= dis_ESC_0F__SSE2 ( &decode_OK
,
22708 archinfo
, vbi
, pfx
, sz
, deltaIN
, dres
);
22713 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22714 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22717 Bool decode_OK
= False
;
22718 delta
= dis_ESC_0F__SSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22723 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22724 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22727 Bool decode_OK
= False
;
22728 delta
= dis_ESC_0F__SSE4 ( &decode_OK
,
22729 archinfo
, vbi
, pfx
, sz
, deltaIN
);
22735 return deltaIN
; /* fail */
22739 /*------------------------------------------------------------*/
22741 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22743 /*------------------------------------------------------------*/
22745 __attribute__((noinline
))
22747 Long
dis_ESC_0F38 (
22748 /*MB_OUT*/DisResult
* dres
,
22749 const VexArchInfo
* archinfo
,
22750 const VexAbiInfo
* vbi
,
22751 Prefix pfx
, Int sz
, Long deltaIN
22754 Long delta
= deltaIN
;
22755 UChar opc
= getUChar(delta
);
22759 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22760 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22761 if (!haveF2orF3(pfx
) && !haveVEX(pfx
)
22762 && (sz
== 2 || sz
== 4 || sz
== 8)) {
22763 IRTemp addr
= IRTemp_INVALID
;
22767 modrm
= getUChar(delta
);
22768 if (epartIsReg(modrm
)) break;
22769 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22771 IRType ty
= szToITy(sz
);
22772 IRTemp src
= newTemp(ty
);
22773 if (opc
== 0xF0) { /* LOAD */
22774 assign(src
, loadLE(ty
, mkexpr(addr
)));
22775 IRTemp dst
= math_BSWAP(src
, ty
);
22776 putIRegG(sz
, pfx
, modrm
, mkexpr(dst
));
22777 DIP("movbe %s,%s\n", dis_buf
, nameIRegG(sz
, pfx
, modrm
));
22778 } else { /* STORE */
22779 assign(src
, getIRegG(sz
, pfx
, modrm
));
22780 IRTemp dst
= math_BSWAP(src
, ty
);
22781 storeLE(mkexpr(addr
), mkexpr(dst
));
22782 DIP("movbe %s,%s\n", nameIRegG(sz
, pfx
, modrm
), dis_buf
);
22786 /* else fall through; maybe one of the decoders below knows what
22795 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22796 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22797 rather than proceeding indiscriminately. */
22799 Bool decode_OK
= False
;
22800 delta
= dis_ESC_0F38__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22805 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22806 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22807 rather than proceeding indiscriminately. */
22809 Bool decode_OK
= False
;
22810 delta
= dis_ESC_0F38__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22815 /* Ignore previous decode attempts and restart from the beginning of
22816 the instruction. */
22818 opc
= getUChar(delta
);
22824 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22825 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22826 /* These were introduced in Broadwell. Gate them on AVX so as to at
22827 least reject them on earlier guests. Has no host requirements. */
22828 if (have66noF2noF3(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22830 sz
= 4; /* 66 prefix but operand size is 4/8 */
22832 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarryX
, True
,
22833 sz
, delta
, "adcx" );
22836 if (haveF3no66noF2(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22837 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagOverX
, True
,
22838 sz
, delta
, "adox" );
22841 /* else fall through */
22849 /*decode_failure:*/
22850 return deltaIN
; /* fail */
22854 /*------------------------------------------------------------*/
22856 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22858 /*------------------------------------------------------------*/
22860 __attribute__((noinline
))
22862 Long
dis_ESC_0F3A (
22863 /*MB_OUT*/DisResult
* dres
,
22864 const VexArchInfo
* archinfo
,
22865 const VexAbiInfo
* vbi
,
22866 Prefix pfx
, Int sz
, Long deltaIN
22869 Long delta
= deltaIN
;
22870 UChar opc
= getUChar(delta
);
22879 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22880 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22881 rather than proceeding indiscriminately. */
22883 Bool decode_OK
= False
;
22884 delta
= dis_ESC_0F3A__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22889 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22890 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22891 rather than proceeding indiscriminately. */
22893 Bool decode_OK
= False
;
22894 delta
= dis_ESC_0F3A__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22899 return deltaIN
; /* fail */
22903 /*------------------------------------------------------------*/
22905 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22907 /*------------------------------------------------------------*/
22909 /* FIXME: common up with the _256_ version below? */
22911 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22912 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22913 Prefix pfx
, Long delta
, const HChar
* name
,
22914 /* The actual operation. Use either 'op' or 'opfn',
22916 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
22917 Bool invertLeftArg
,
22921 UChar modrm
= getUChar(delta
);
22922 UInt rD
= gregOfRexRM(pfx
, modrm
);
22923 UInt rSL
= getVexNvvvv(pfx
);
22924 IRTemp tSL
= newTemp(Ity_V128
);
22925 IRTemp tSR
= newTemp(Ity_V128
);
22926 IRTemp addr
= IRTemp_INVALID
;
22929 vassert(0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*WIG?*/);
22931 assign(tSL
, invertLeftArg
? unop(Iop_NotV128
, getXMMReg(rSL
))
22934 if (epartIsReg(modrm
)) {
22935 UInt rSR
= eregOfRexRM(pfx
, modrm
);
22937 assign(tSR
, getXMMReg(rSR
));
22938 DIP("%s %s,%s,%s\n",
22939 name
, nameXMMReg(rSR
), nameXMMReg(rSL
), nameXMMReg(rD
));
22941 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22943 assign(tSR
, loadLE(Ity_V128
, mkexpr(addr
)));
22944 DIP("%s %s,%s,%s\n",
22945 name
, dis_buf
, nameXMMReg(rSL
), nameXMMReg(rD
));
22948 IRTemp res
= IRTemp_INVALID
;
22949 if (op
!= Iop_INVALID
) {
22950 vassert(opFn
== NULL
);
22951 res
= newTemp(Ity_V128
);
22952 if (requiresRMode(op
)) {
22953 IRTemp rm
= newTemp(Ity_I32
);
22954 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22955 assign(res
, swapArgs
22956 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
22957 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
22959 assign(res
, swapArgs
22960 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
22961 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
22964 vassert(opFn
!= NULL
);
22965 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
22968 putYMMRegLoAndZU(rD
, mkexpr(res
));
22975 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22976 for the operation, no inversion of the left arg, and no swapping of
22979 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22980 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22981 Prefix pfx
, Long delta
, const HChar
* name
,
22985 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22986 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
22990 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22991 generator to compute the result, no inversion of the left
22992 arg, and no swapping of args. */
22994 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22995 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22996 Prefix pfx
, Long delta
, const HChar
* name
,
22997 IRTemp(*opFn
)(IRTemp
,IRTemp
)
23000 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23001 uses_vvvv
, vbi
, pfx
, delta
, name
,
23002 Iop_INVALID
, opFn
, False
, False
);
23006 /* Vector by scalar shift of V by the amount specified at the bottom
23008 static ULong
dis_AVX128_shiftV_byE ( const VexAbiInfo
* vbi
,
23009 Prefix pfx
, Long delta
,
23010 const HChar
* opname
, IROp op
)
23015 Bool shl
, shr
, sar
;
23016 UChar modrm
= getUChar(delta
);
23017 UInt rG
= gregOfRexRM(pfx
,modrm
);
23018 UInt rV
= getVexNvvvv(pfx
);;
23019 IRTemp g0
= newTemp(Ity_V128
);
23020 IRTemp g1
= newTemp(Ity_V128
);
23021 IRTemp amt
= newTemp(Ity_I64
);
23022 IRTemp amt8
= newTemp(Ity_I8
);
23023 if (epartIsReg(modrm
)) {
23024 UInt rE
= eregOfRexRM(pfx
,modrm
);
23025 assign( amt
, getXMMRegLane64(rE
, 0) );
23026 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23027 nameXMMReg(rV
), nameXMMReg(rG
) );
23030 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23031 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
23032 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
23035 assign( g0
, getXMMReg(rV
) );
23036 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
23038 shl
= shr
= sar
= False
;
23041 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
23042 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
23043 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
23044 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
23045 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
23046 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
23047 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
23048 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
23049 default: vassert(0);
23056 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23057 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23066 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23067 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23068 binop(op
, mkexpr(g0
), mkU8(size
-1))
23075 putYMMRegLoAndZU( rG
, mkexpr(g1
) );
23080 /* Vector by scalar shift of V by the amount specified at the bottom
23082 static ULong
dis_AVX256_shiftV_byE ( const VexAbiInfo
* vbi
,
23083 Prefix pfx
, Long delta
,
23084 const HChar
* opname
, IROp op
)
23089 Bool shl
, shr
, sar
;
23090 UChar modrm
= getUChar(delta
);
23091 UInt rG
= gregOfRexRM(pfx
,modrm
);
23092 UInt rV
= getVexNvvvv(pfx
);;
23093 IRTemp g0
= newTemp(Ity_V256
);
23094 IRTemp g1
= newTemp(Ity_V256
);
23095 IRTemp amt
= newTemp(Ity_I64
);
23096 IRTemp amt8
= newTemp(Ity_I8
);
23097 if (epartIsReg(modrm
)) {
23098 UInt rE
= eregOfRexRM(pfx
,modrm
);
23099 assign( amt
, getXMMRegLane64(rE
, 0) );
23100 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23101 nameYMMReg(rV
), nameYMMReg(rG
) );
23104 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23105 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
23106 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
23109 assign( g0
, getYMMReg(rV
) );
23110 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
23112 shl
= shr
= sar
= False
;
23115 case Iop_ShlN16x16
: shl
= True
; size
= 32; break;
23116 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23117 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23118 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23119 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23120 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23121 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23122 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23123 default: vassert(0);
23130 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23131 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23132 binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23140 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23141 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23142 binop(op
, mkexpr(g0
), mkU8(size
-1))
23149 putYMMReg( rG
, mkexpr(g1
) );
23154 /* Vector by vector shift of V by the amount specified at the bottom
23155 of E. Vector by vector shifts are defined for all shift amounts,
23156 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23158 static ULong
dis_AVX_var_shiftV_byE ( const VexAbiInfo
* vbi
,
23159 Prefix pfx
, Long delta
,
23160 const HChar
* opname
, IROp op
, Bool isYMM
)
23165 UChar modrm
= getUChar(delta
);
23166 UInt rG
= gregOfRexRM(pfx
,modrm
);
23167 UInt rV
= getVexNvvvv(pfx
);;
23168 IRTemp sV
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23169 IRTemp amt
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23170 IRTemp amts
[8], sVs
[8], res
[8];
23171 if (epartIsReg(modrm
)) {
23172 UInt rE
= eregOfRexRM(pfx
,modrm
);
23173 assign( amt
, isYMM
? getYMMReg(rE
) : getXMMReg(rE
) );
23175 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rE
),
23176 nameYMMReg(rV
), nameYMMReg(rG
) );
23178 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23179 nameXMMReg(rV
), nameXMMReg(rG
) );
23183 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23184 assign( amt
, loadLE(isYMM
? Ity_V256
: Ity_V128
, mkexpr(addr
)) );
23186 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
),
23189 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
),
23194 assign( sV
, isYMM
? getYMMReg(rV
) : getXMMReg(rV
) );
23198 case Iop_Shl32
: size
= 32; break;
23199 case Iop_Shl64
: size
= 64; break;
23200 case Iop_Sar32
: size
= 32; break;
23201 case Iop_Shr32
: size
= 32; break;
23202 case Iop_Shr64
: size
= 64; break;
23203 default: vassert(0);
23206 for (i
= 0; i
< 8; i
++) {
23207 sVs
[i
] = IRTemp_INVALID
;
23208 amts
[i
] = IRTemp_INVALID
;
23213 breakupV256to32s( sV
, &sVs
[7], &sVs
[6], &sVs
[5], &sVs
[4],
23214 &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23215 breakupV256to32s( amt
, &amts
[7], &amts
[6], &amts
[5], &amts
[4],
23216 &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23218 breakupV128to32s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23219 breakupV128to32s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23224 breakupV256to64s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23225 breakupV256to64s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23227 breakupV128to64s( sV
, &sVs
[1], &sVs
[0] );
23228 breakupV128to64s( amt
, &amts
[1], &amts
[0] );
23231 default: vassert(0);
23233 for (i
= 0; i
< 8; i
++)
23234 if (sVs
[i
] != IRTemp_INVALID
) {
23235 res
[i
] = size
== 32 ? newTemp(Ity_I32
) : newTemp(Ity_I64
);
23238 binop(size
== 32 ? Iop_CmpLT32U
: Iop_CmpLT64U
,
23240 size
== 32 ? mkU32(size
) : mkU64(size
)),
23241 binop(op
, mkexpr(sVs
[i
]),
23242 unop(size
== 32 ? Iop_32to8
: Iop_64to8
,
23244 op
== Iop_Sar32
? binop(op
, mkexpr(sVs
[i
]), mkU8(size
-1))
23245 : size
== 32 ? mkU32(0) : mkU64(0)
23248 res
[i
] = IRTemp_INVALID
;
23252 for (i
= 0; i
< 8; i
++)
23253 putYMMRegLane32( rG
, i
, (i
< 4 || isYMM
)
23254 ? mkexpr(res
[i
]) : mkU32(0) );
23257 for (i
= 0; i
< 4; i
++)
23258 putYMMRegLane64( rG
, i
, (i
< 2 || isYMM
)
23259 ? mkexpr(res
[i
]) : mkU64(0) );
23261 default: vassert(0);
23268 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23269 version of dis_SSE_shiftE_imm. */
23271 Long
dis_AVX128_shiftE_to_V_imm( Prefix pfx
,
23272 Long delta
, const HChar
* opname
, IROp op
)
23274 Bool shl
, shr
, sar
;
23275 UChar rm
= getUChar(delta
);
23276 IRTemp e0
= newTemp(Ity_V128
);
23277 IRTemp e1
= newTemp(Ity_V128
);
23278 UInt rD
= getVexNvvvv(pfx
);
23280 vassert(epartIsReg(rm
));
23281 vassert(gregLO3ofRM(rm
) == 2
23282 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23283 amt
= getUChar(delta
+1);
23285 DIP("%s $%d,%s,%s\n", opname
,
23287 nameXMMReg(eregOfRexRM(pfx
,rm
)),
23289 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
23291 shl
= shr
= sar
= False
;
23294 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
23295 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
23296 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
23297 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
23298 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
23299 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
23300 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
23301 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
23302 default: vassert(0);
23306 assign( e1
, amt
>= size
23308 : binop(op
, mkexpr(e0
), mkU8(amt
))
23312 assign( e1
, amt
>= size
23313 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23314 : binop(op
, mkexpr(e0
), mkU8(amt
))
23320 putYMMRegLoAndZU( rD
, mkexpr(e1
) );
23325 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23326 version of dis_AVX128_shiftE_to_V_imm. */
23328 Long
dis_AVX256_shiftE_to_V_imm( Prefix pfx
,
23329 Long delta
, const HChar
* opname
, IROp op
)
23331 Bool shl
, shr
, sar
;
23332 UChar rm
= getUChar(delta
);
23333 IRTemp e0
= newTemp(Ity_V256
);
23334 IRTemp e1
= newTemp(Ity_V256
);
23335 UInt rD
= getVexNvvvv(pfx
);
23337 vassert(epartIsReg(rm
));
23338 vassert(gregLO3ofRM(rm
) == 2
23339 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23340 amt
= getUChar(delta
+1);
23342 DIP("%s $%d,%s,%s\n", opname
,
23344 nameYMMReg(eregOfRexRM(pfx
,rm
)),
23346 assign( e0
, getYMMReg(eregOfRexRM(pfx
,rm
)) );
23348 shl
= shr
= sar
= False
;
23351 case Iop_ShlN16x16
: shl
= True
; size
= 16; break;
23352 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23353 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23354 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23355 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23356 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23357 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23358 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23359 default: vassert(0);
23364 assign( e1
, amt
>= size
23365 ? binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23366 : binop(op
, mkexpr(e0
), mkU8(amt
))
23370 assign( e1
, amt
>= size
23371 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23372 : binop(op
, mkexpr(e0
), mkU8(amt
))
23378 putYMMReg( rD
, mkexpr(e1
) );
23383 /* Lower 64-bit lane only AVX128 binary operation:
23384 G[63:0] = V[63:0] `op` E[63:0]
23385 G[127:64] = V[127:64]
23387 The specified op must be of the 64F0x2 kind, so that it
23388 copies the upper half of the left operand to the result.
23390 static Long
dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool
* uses_vvvv
,
23391 const VexAbiInfo
* vbi
,
23392 Prefix pfx
, Long delta
,
23393 const HChar
* opname
, IROp op
)
23398 UChar rm
= getUChar(delta
);
23399 UInt rG
= gregOfRexRM(pfx
,rm
);
23400 UInt rV
= getVexNvvvv(pfx
);
23401 IRExpr
* vpart
= getXMMReg(rV
);
23402 if (epartIsReg(rm
)) {
23403 UInt rE
= eregOfRexRM(pfx
,rm
);
23404 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23405 DIP("%s %s,%s,%s\n", opname
,
23406 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23409 /* We can only do a 64-bit memory read, so the upper half of the
23410 E operand needs to be made simply of zeroes. */
23411 IRTemp epart
= newTemp(Ity_V128
);
23412 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23413 assign( epart
, unop( Iop_64UtoV128
,
23414 loadLE(Ity_I64
, mkexpr(addr
))) );
23415 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23416 DIP("%s %s,%s,%s\n", opname
,
23417 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23418 delta
= delta
+alen
;
23420 putYMMRegLane128( rG
, 1, mkV128(0) );
23426 /* Lower 64-bit lane only AVX128 unary operation:
23427 G[63:0] = op(E[63:0])
23428 G[127:64] = V[127:64]
23430 The specified op must be of the 64F0x2 kind, so that it
23431 copies the upper half of the operand to the result.
23433 static Long
dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool
* uses_vvvv
,
23434 const VexAbiInfo
* vbi
,
23435 Prefix pfx
, Long delta
,
23436 const HChar
* opname
, IROp op
)
23441 UChar rm
= getUChar(delta
);
23442 UInt rG
= gregOfRexRM(pfx
,rm
);
23443 UInt rV
= getVexNvvvv(pfx
);
23444 IRTemp e64
= newTemp(Ity_I64
);
23446 /* Fetch E[63:0] */
23447 if (epartIsReg(rm
)) {
23448 UInt rE
= eregOfRexRM(pfx
,rm
);
23449 assign(e64
, getXMMRegLane64(rE
, 0));
23450 DIP("%s %s,%s,%s\n", opname
,
23451 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23454 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23455 assign(e64
, loadLE(Ity_I64
, mkexpr(addr
)));
23456 DIP("%s %s,%s,%s\n", opname
,
23457 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23461 /* Create a value 'arg' as V[127:64]++E[63:0] */
23462 IRTemp arg
= newTemp(Ity_V128
);
23464 binop(Iop_SetV128lo64
,
23465 getXMMReg(rV
), mkexpr(e64
)));
23466 /* and apply op to it */
23467 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23473 /* Lower 32-bit lane only AVX128 unary operation:
23474 G[31:0] = op(E[31:0])
23475 G[127:32] = V[127:32]
23477 The specified op must be of the 32F0x4 kind, so that it
23478 copies the upper 3/4 of the operand to the result.
23480 static Long
dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool
* uses_vvvv
,
23481 const VexAbiInfo
* vbi
,
23482 Prefix pfx
, Long delta
,
23483 const HChar
* opname
, IROp op
)
23488 UChar rm
= getUChar(delta
);
23489 UInt rG
= gregOfRexRM(pfx
,rm
);
23490 UInt rV
= getVexNvvvv(pfx
);
23491 IRTemp e32
= newTemp(Ity_I32
);
23493 /* Fetch E[31:0] */
23494 if (epartIsReg(rm
)) {
23495 UInt rE
= eregOfRexRM(pfx
,rm
);
23496 assign(e32
, getXMMRegLane32(rE
, 0));
23497 DIP("%s %s,%s,%s\n", opname
,
23498 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23501 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23502 assign(e32
, loadLE(Ity_I32
, mkexpr(addr
)));
23503 DIP("%s %s,%s,%s\n", opname
,
23504 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23508 /* Create a value 'arg' as V[127:32]++E[31:0] */
23509 IRTemp arg
= newTemp(Ity_V128
);
23511 binop(Iop_SetV128lo32
,
23512 getXMMReg(rV
), mkexpr(e32
)));
23513 /* and apply op to it */
23514 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23520 /* Lower 32-bit lane only AVX128 binary operation:
23521 G[31:0] = V[31:0] `op` E[31:0]
23522 G[127:32] = V[127:32]
23524 The specified op must be of the 32F0x4 kind, so that it
23525 copies the upper 3/4 of the left operand to the result.
23527 static Long
dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool
* uses_vvvv
,
23528 const VexAbiInfo
* vbi
,
23529 Prefix pfx
, Long delta
,
23530 const HChar
* opname
, IROp op
)
23535 UChar rm
= getUChar(delta
);
23536 UInt rG
= gregOfRexRM(pfx
,rm
);
23537 UInt rV
= getVexNvvvv(pfx
);
23538 IRExpr
* vpart
= getXMMReg(rV
);
23539 if (epartIsReg(rm
)) {
23540 UInt rE
= eregOfRexRM(pfx
,rm
);
23541 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23542 DIP("%s %s,%s,%s\n", opname
,
23543 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23546 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23547 E operand needs to be made simply of zeroes. */
23548 IRTemp epart
= newTemp(Ity_V128
);
23549 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23550 assign( epart
, unop( Iop_32UtoV128
,
23551 loadLE(Ity_I32
, mkexpr(addr
))) );
23552 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23553 DIP("%s %s,%s,%s\n", opname
,
23554 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23555 delta
= delta
+alen
;
23557 putYMMRegLane128( rG
, 1, mkV128(0) );
23563 /* All-lanes AVX128 binary operation:
23564 G[127:0] = V[127:0] `op` E[127:0]
23567 static Long
dis_AVX128_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23568 const VexAbiInfo
* vbi
,
23569 Prefix pfx
, Long delta
,
23570 const HChar
* opname
, IROp op
)
23572 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23573 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23574 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23579 /* Handles AVX128 32F/64F comparisons. A derivative of
23580 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23581 original delta to indicate failure. */
23583 Long
dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23584 const VexAbiInfo
* vbi
,
23585 Prefix pfx
, Long delta
,
23586 const HChar
* opname
, Bool all_lanes
, Int sz
)
23588 vassert(sz
== 4 || sz
== 8);
23589 Long deltaIN
= delta
;
23594 Bool preZero
= False
;
23595 Bool preSwap
= False
;
23596 IROp op
= Iop_INVALID
;
23597 Bool postNot
= False
;
23598 IRTemp plain
= newTemp(Ity_V128
);
23599 UChar rm
= getUChar(delta
);
23600 UInt rG
= gregOfRexRM(pfx
, rm
);
23601 UInt rV
= getVexNvvvv(pfx
);
23602 IRTemp argL
= newTemp(Ity_V128
);
23603 IRTemp argR
= newTemp(Ity_V128
);
23605 assign(argL
, getXMMReg(rV
));
23606 if (epartIsReg(rm
)) {
23607 imm8
= getUChar(delta
+1);
23608 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
23609 imm8
, all_lanes
, sz
);
23610 if (!ok
) return deltaIN
; /* FAIL */
23611 UInt rE
= eregOfRexRM(pfx
,rm
);
23612 assign(argR
, getXMMReg(rE
));
23614 DIP("%s $%u,%s,%s,%s\n",
23616 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23618 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23619 imm8
= getUChar(delta
+alen
);
23620 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
23621 imm8
, all_lanes
, sz
);
23622 if (!ok
) return deltaIN
; /* FAIL */
23624 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
23625 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
23626 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
))));
23628 DIP("%s $%u,%s,%s,%s\n",
23629 opname
, imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23632 IRTemp argMask
= newTemp(Ity_V128
);
23634 // In this case, preSwap is irrelevant, but it's harmless to honour it
23636 assign(argMask
, mkV128(all_lanes
? 0x0000 : (sz
==4 ? 0xFFF0 : 0xFF00)));
23638 assign(argMask
, mkV128(0xFFFF));
23643 preSwap
? binop(op
, binop(Iop_AndV128
, mkexpr(argR
), mkexpr(argMask
)),
23644 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(argMask
)))
23645 : binop(op
, binop(Iop_AndV128
, mkexpr(argL
), mkexpr(argMask
)),
23646 binop(Iop_AndV128
, mkexpr(argR
), mkexpr(argMask
)))
23650 /* This is simple: just invert the result, if necessary, and
23653 putYMMRegLoAndZU( rG
, unop(Iop_NotV128
, mkexpr(plain
)) );
23655 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23660 /* More complex. It's a one-lane-only, hence need to possibly
23661 invert only that one lane. But at least the other lanes are
23662 correctly "in" the result, having been copied from the left
23665 IRExpr
* mask
= mkV128(sz
==4 ? 0x000F : 0x00FF);
23666 putYMMRegLoAndZU( rG
, binop(Iop_XorV128
, mkexpr(plain
),
23669 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23673 /* This is the most complex case. One-lane-only, but the args
23674 were swapped. So we have to possibly invert the bottom lane,
23675 and (definitely) we have to copy the upper lane(s) from argL
23676 since, due to the swapping, what's currently there is from
23677 argR, which is not correct. */
23678 IRTemp res
= newTemp(Ity_V128
);
23679 IRTemp mask
= newTemp(Ity_V128
);
23680 IRTemp notMask
= newTemp(Ity_V128
);
23681 assign(mask
, mkV128(sz
==4 ? 0x000F : 0x00FF));
23682 assign(notMask
, mkV128(sz
==4 ? 0xFFF0 : 0xFF00));
23687 unop(Iop_NotV128
, mkexpr(plain
)),
23689 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23696 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23698 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23706 /* Handles AVX256 32F/64F comparisons. A derivative of
23707 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23708 original delta to indicate failure. */
23710 Long
dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23711 const VexAbiInfo
* vbi
,
23712 Prefix pfx
, Long delta
,
23713 const HChar
* opname
, Int sz
)
23715 vassert(sz
== 4 || sz
== 8);
23716 Long deltaIN
= delta
;
23721 Bool preZero
= False
;
23722 Bool preSwap
= False
;
23723 IROp op
= Iop_INVALID
;
23724 Bool postNot
= False
;
23725 IRTemp plain
= newTemp(Ity_V256
);
23726 UChar rm
= getUChar(delta
);
23727 UInt rG
= gregOfRexRM(pfx
, rm
);
23728 UInt rV
= getVexNvvvv(pfx
);
23729 IRTemp argL
= newTemp(Ity_V256
);
23730 IRTemp argR
= newTemp(Ity_V256
);
23731 IRTemp argLhi
= IRTemp_INVALID
;
23732 IRTemp argLlo
= IRTemp_INVALID
;
23733 IRTemp argRhi
= IRTemp_INVALID
;
23734 IRTemp argRlo
= IRTemp_INVALID
;
23736 assign(argL
, getYMMReg(rV
));
23737 if (epartIsReg(rm
)) {
23738 imm8
= getUChar(delta
+1);
23739 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
, imm8
,
23740 True
/*all_lanes*/, sz
);
23741 if (!ok
) return deltaIN
; /* FAIL */
23742 UInt rE
= eregOfRexRM(pfx
,rm
);
23743 assign(argR
, getYMMReg(rE
));
23745 DIP("%s $%u,%s,%s,%s\n",
23747 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
23749 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23750 imm8
= getUChar(delta
+alen
);
23751 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
, imm8
,
23752 True
/*all_lanes*/, sz
);
23753 if (!ok
) return deltaIN
; /* FAIL */
23754 assign(argR
, loadLE(Ity_V256
, mkexpr(addr
)) );
23756 DIP("%s $%u,%s,%s,%s\n",
23757 opname
, imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
23760 breakupV256toV128s( preSwap
? argR
: argL
, &argLhi
, &argLlo
);
23761 breakupV256toV128s( preSwap
? argL
: argR
, &argRhi
, &argRlo
);
23763 IRTemp argMask
= newTemp(Ity_V128
);
23765 // In this case, preSwap is irrelevant, but it's harmless to honour it
23767 assign(argMask
, mkV128(0x0000));
23769 assign(argMask
, mkV128(0xFFFF));
23774 binop( Iop_V128HLtoV256
,
23775 binop(op
, binop(Iop_AndV128
, mkexpr(argLhi
), mkexpr(argMask
)),
23776 binop(Iop_AndV128
, mkexpr(argRhi
), mkexpr(argMask
))),
23777 binop(op
, binop(Iop_AndV128
, mkexpr(argLlo
), mkexpr(argMask
)),
23778 binop(Iop_AndV128
, mkexpr(argRlo
), mkexpr(argMask
))))
23781 /* This is simple: just invert the result, if necessary, and
23784 putYMMReg( rG
, unop(Iop_NotV256
, mkexpr(plain
)) );
23786 putYMMReg( rG
, mkexpr(plain
) );
23794 /* Handles AVX128 unary E-to-G all-lanes operations. */
23796 Long
dis_AVX128_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23797 const VexAbiInfo
* vbi
,
23798 Prefix pfx
, Long delta
,
23799 const HChar
* opname
,
23800 IRTemp (*opFn
)(IRTemp
) )
23805 IRTemp res
= newTemp(Ity_V128
);
23806 IRTemp arg
= newTemp(Ity_V128
);
23807 UChar rm
= getUChar(delta
);
23808 UInt rG
= gregOfRexRM(pfx
, rm
);
23809 if (epartIsReg(rm
)) {
23810 UInt rE
= eregOfRexRM(pfx
,rm
);
23811 assign(arg
, getXMMReg(rE
));
23813 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23815 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23816 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23818 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23821 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23822 *uses_vvvv
= False
;
23827 /* Handles AVX128 unary E-to-G all-lanes operations. */
23829 Long
dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
23830 const VexAbiInfo
* vbi
,
23831 Prefix pfx
, Long delta
,
23832 const HChar
* opname
, IROp op
)
23837 IRTemp arg
= newTemp(Ity_V128
);
23838 UChar rm
= getUChar(delta
);
23839 UInt rG
= gregOfRexRM(pfx
, rm
);
23840 if (epartIsReg(rm
)) {
23841 UInt rE
= eregOfRexRM(pfx
,rm
);
23842 assign(arg
, getXMMReg(rE
));
23844 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23846 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23847 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23849 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23851 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23852 // up in the usual way.
23853 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
23854 /* XXXROUNDINGFIXME */
23855 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), mkexpr(arg
))
23856 : unop(op
, mkexpr(arg
));
23857 putYMMRegLoAndZU( rG
, res
);
23858 *uses_vvvv
= False
;
23863 /* FIXME: common up with the _128_ version above? */
23865 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23866 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23867 Prefix pfx
, Long delta
, const HChar
* name
,
23868 /* The actual operation. Use either 'op' or 'opfn',
23870 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
23871 Bool invertLeftArg
,
23875 UChar modrm
= getUChar(delta
);
23876 UInt rD
= gregOfRexRM(pfx
, modrm
);
23877 UInt rSL
= getVexNvvvv(pfx
);
23878 IRTemp tSL
= newTemp(Ity_V256
);
23879 IRTemp tSR
= newTemp(Ity_V256
);
23880 IRTemp addr
= IRTemp_INVALID
;
23883 vassert(1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*WIG?*/);
23885 assign(tSL
, invertLeftArg
? unop(Iop_NotV256
, getYMMReg(rSL
))
23888 if (epartIsReg(modrm
)) {
23889 UInt rSR
= eregOfRexRM(pfx
, modrm
);
23891 assign(tSR
, getYMMReg(rSR
));
23892 DIP("%s %s,%s,%s\n",
23893 name
, nameYMMReg(rSR
), nameYMMReg(rSL
), nameYMMReg(rD
));
23895 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23897 assign(tSR
, loadLE(Ity_V256
, mkexpr(addr
)));
23898 DIP("%s %s,%s,%s\n",
23899 name
, dis_buf
, nameYMMReg(rSL
), nameYMMReg(rD
));
23902 IRTemp res
= IRTemp_INVALID
;
23903 if (op
!= Iop_INVALID
) {
23904 vassert(opFn
== NULL
);
23905 res
= newTemp(Ity_V256
);
23906 if (requiresRMode(op
)) {
23907 IRTemp rm
= newTemp(Ity_I32
);
23908 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23909 assign(res
, swapArgs
23910 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
23911 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
23913 assign(res
, swapArgs
23914 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
23915 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
23918 vassert(opFn
!= NULL
);
23919 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
23922 putYMMReg(rD
, mkexpr(res
));
23929 /* All-lanes AVX256 binary operation:
23930 G[255:0] = V[255:0] `op` E[255:0]
23932 static Long
dis_AVX256_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23933 const VexAbiInfo
* vbi
,
23934 Prefix pfx
, Long delta
,
23935 const HChar
* opname
, IROp op
)
23937 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23938 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23939 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23944 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23945 for the operation, no inversion of the left arg, and no swapping of
23948 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23949 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23950 Prefix pfx
, Long delta
, const HChar
* name
,
23954 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23955 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
23959 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23960 generator to compute the result, no inversion of the left
23961 arg, and no swapping of args. */
23963 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23964 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23965 Prefix pfx
, Long delta
, const HChar
* name
,
23966 IRTemp(*opFn
)(IRTemp
,IRTemp
)
23969 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23970 uses_vvvv
, vbi
, pfx
, delta
, name
,
23971 Iop_INVALID
, opFn
, False
, False
);
23975 /* Handles AVX256 unary E-to-G all-lanes operations. */
23977 Long
dis_AVX256_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23978 const VexAbiInfo
* vbi
,
23979 Prefix pfx
, Long delta
,
23980 const HChar
* opname
,
23981 IRTemp (*opFn
)(IRTemp
) )
23986 IRTemp res
= newTemp(Ity_V256
);
23987 IRTemp arg
= newTemp(Ity_V256
);
23988 UChar rm
= getUChar(delta
);
23989 UInt rG
= gregOfRexRM(pfx
, rm
);
23990 if (epartIsReg(rm
)) {
23991 UInt rE
= eregOfRexRM(pfx
,rm
);
23992 assign(arg
, getYMMReg(rE
));
23994 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
23996 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23997 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
23999 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
24002 putYMMReg( rG
, mkexpr(res
) );
24003 *uses_vvvv
= False
;
24008 /* Handles AVX256 unary E-to-G all-lanes operations. */
24010 Long
dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
24011 const VexAbiInfo
* vbi
,
24012 Prefix pfx
, Long delta
,
24013 const HChar
* opname
, IROp op
)
24018 IRTemp arg
= newTemp(Ity_V256
);
24019 UChar rm
= getUChar(delta
);
24020 UInt rG
= gregOfRexRM(pfx
, rm
);
24021 if (epartIsReg(rm
)) {
24022 UInt rE
= eregOfRexRM(pfx
,rm
);
24023 assign(arg
, getYMMReg(rE
));
24025 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
24027 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24028 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
24030 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
24032 putYMMReg( rG
, unop(op
, mkexpr(arg
)) );
24033 *uses_vvvv
= False
;
24038 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
24039 had a variant of Iop_64x4toV256 that took F64s as args instead. */
24040 static Long
dis_CVTDQ2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
24043 IRTemp addr
= IRTemp_INVALID
;
24046 UChar modrm
= getUChar(delta
);
24047 IRTemp sV
= newTemp(Ity_V128
);
24048 UInt rG
= gregOfRexRM(pfx
,modrm
);
24049 if (epartIsReg(modrm
)) {
24050 UInt rE
= eregOfRexRM(pfx
,modrm
);
24051 assign( sV
, getXMMReg(rE
) );
24053 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
24055 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24056 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24058 DIP("vcvtdq2pd %s,%s\n", dis_buf
, nameYMMReg(rG
) );
24060 IRTemp s3
, s2
, s1
, s0
;
24061 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
24062 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
24066 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s3
))),
24067 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s2
))),
24068 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s1
))),
24069 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s0
)))
24071 putYMMReg(rG
, res
);
24076 static Long
dis_CVTPD2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
24079 IRTemp addr
= IRTemp_INVALID
;
24082 UChar modrm
= getUChar(delta
);
24083 UInt rG
= gregOfRexRM(pfx
,modrm
);
24084 IRTemp argV
= newTemp(Ity_V256
);
24085 IRTemp rmode
= newTemp(Ity_I32
);
24086 if (epartIsReg(modrm
)) {
24087 UInt rE
= eregOfRexRM(pfx
,modrm
);
24088 assign( argV
, getYMMReg(rE
) );
24090 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE
), nameXMMReg(rG
));
24092 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24093 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24095 DIP("vcvtpd2psy %s,%s\n", dis_buf
, nameXMMReg(rG
) );
24098 assign( rmode
, get_sse_roundingmode() );
24099 IRTemp t3
, t2
, t1
, t0
;
24100 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
24101 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
24102 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
24103 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
24104 putXMMRegLane32F( rG
, 3, CVT(t3
) );
24105 putXMMRegLane32F( rG
, 2, CVT(t2
) );
24106 putXMMRegLane32F( rG
, 1, CVT(t1
) );
24107 putXMMRegLane32F( rG
, 0, CVT(t0
) );
24109 putYMMRegLane128( rG
, 1, mkV128(0) );
24114 static IRTemp
math_VPUNPCK_YMM ( IRTemp tL
, IRType tR
, IROp op
)
24116 IRTemp tLhi
, tLlo
, tRhi
, tRlo
;
24117 tLhi
= tLlo
= tRhi
= tRlo
= IRTemp_INVALID
;
24118 IRTemp res
= newTemp(Ity_V256
);
24119 breakupV256toV128s( tL
, &tLhi
, &tLlo
);
24120 breakupV256toV128s( tR
, &tRhi
, &tRlo
);
24121 assign( res
, binop( Iop_V128HLtoV256
,
24122 binop( op
, mkexpr(tRhi
), mkexpr(tLhi
) ),
24123 binop( op
, mkexpr(tRlo
), mkexpr(tLlo
) ) ) );
24128 static IRTemp
math_VPUNPCKLBW_YMM ( IRTemp tL
, IRTemp tR
)
24130 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO8x16
);
24134 static IRTemp
math_VPUNPCKLWD_YMM ( IRTemp tL
, IRTemp tR
)
24136 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO16x8
);
24140 static IRTemp
math_VPUNPCKLDQ_YMM ( IRTemp tL
, IRTemp tR
)
24142 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO32x4
);
24146 static IRTemp
math_VPUNPCKLQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24148 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO64x2
);
24152 static IRTemp
math_VPUNPCKHBW_YMM ( IRTemp tL
, IRTemp tR
)
24154 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI8x16
);
24158 static IRTemp
math_VPUNPCKHWD_YMM ( IRTemp tL
, IRTemp tR
)
24160 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI16x8
);
24164 static IRTemp
math_VPUNPCKHDQ_YMM ( IRTemp tL
, IRTemp tR
)
24166 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI32x4
);
24170 static IRTemp
math_VPUNPCKHQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24172 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI64x2
);
24176 static IRTemp
math_VPACKSSWB_YMM ( IRTemp tL
, IRTemp tR
)
24178 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Sx16
);
24182 static IRTemp
math_VPACKUSWB_YMM ( IRTemp tL
, IRTemp tR
)
24184 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Ux16
);
24188 static IRTemp
math_VPACKSSDW_YMM ( IRTemp tL
, IRTemp tR
)
24190 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Sx8
);
24194 static IRTemp
math_VPACKUSDW_YMM ( IRTemp tL
, IRTemp tR
)
24196 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Ux8
);
24200 __attribute__((noinline
))
24202 Long
dis_ESC_0F__VEX (
24203 /*MB_OUT*/DisResult
* dres
,
24204 /*OUT*/ Bool
* uses_vvvv
,
24205 const VexArchInfo
* archinfo
,
24206 const VexAbiInfo
* vbi
,
24207 Prefix pfx
, Int sz
, Long deltaIN
24210 IRTemp addr
= IRTemp_INVALID
;
24213 Long delta
= deltaIN
;
24214 UChar opc
= getUChar(delta
);
24216 *uses_vvvv
= False
;
24221 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24222 /* Move 64 bits from E (mem only) to G (lo half xmm).
24223 Bits 255-64 of the dest are zeroed out. */
24224 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24225 UChar modrm
= getUChar(delta
);
24226 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24227 UInt rG
= gregOfRexRM(pfx
,modrm
);
24228 IRTemp z128
= newTemp(Ity_V128
);
24229 assign(z128
, mkV128(0));
24230 putXMMReg( rG
, mkexpr(z128
) );
24231 /* FIXME: ALIGNMENT CHECK? */
24232 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
24233 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24234 DIP("vmovsd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24236 goto decode_success
;
24238 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24240 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24241 UChar modrm
= getUChar(delta
);
24242 UInt rG
= gregOfRexRM(pfx
, modrm
);
24243 UInt rE
= eregOfRexRM(pfx
, modrm
);
24244 UInt rV
= getVexNvvvv(pfx
);
24246 DIP("vmovsd %s,%s,%s\n",
24247 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24248 IRTemp res
= newTemp(Ity_V128
);
24249 assign(res
, binop(Iop_64HLtoV128
,
24250 getXMMRegLane64(rV
, 1),
24251 getXMMRegLane64(rE
, 0)));
24252 putYMMRegLoAndZU(rG
, mkexpr(res
));
24254 goto decode_success
;
24256 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24257 /* Move 32 bits from E (mem only) to G (lo half xmm).
24258 Bits 255-32 of the dest are zeroed out. */
24259 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24260 UChar modrm
= getUChar(delta
);
24261 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24262 UInt rG
= gregOfRexRM(pfx
,modrm
);
24263 IRTemp z128
= newTemp(Ity_V128
);
24264 assign(z128
, mkV128(0));
24265 putXMMReg( rG
, mkexpr(z128
) );
24266 /* FIXME: ALIGNMENT CHECK? */
24267 putXMMRegLane32( rG
, 0, loadLE(Ity_I32
, mkexpr(addr
)) );
24268 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24269 DIP("vmovss %s,%s\n", dis_buf
, nameXMMReg(rG
));
24271 goto decode_success
;
24273 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24275 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24276 UChar modrm
= getUChar(delta
);
24277 UInt rG
= gregOfRexRM(pfx
, modrm
);
24278 UInt rE
= eregOfRexRM(pfx
, modrm
);
24279 UInt rV
= getVexNvvvv(pfx
);
24281 DIP("vmovss %s,%s,%s\n",
24282 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24283 IRTemp res
= newTemp(Ity_V128
);
24284 assign( res
, binop( Iop_64HLtoV128
,
24285 getXMMRegLane64(rV
, 1),
24286 binop(Iop_32HLto64
,
24287 getXMMRegLane32(rV
, 1),
24288 getXMMRegLane32(rE
, 0)) ) );
24289 putYMMRegLoAndZU(rG
, mkexpr(res
));
24291 goto decode_success
;
24293 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24294 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24295 UChar modrm
= getUChar(delta
);
24296 UInt rG
= gregOfRexRM(pfx
, modrm
);
24297 if (epartIsReg(modrm
)) {
24298 UInt rE
= eregOfRexRM(pfx
,modrm
);
24299 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24300 DIP("vmovupd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24303 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24304 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24305 DIP("vmovupd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24308 goto decode_success
;
24310 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24311 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24312 UChar modrm
= getUChar(delta
);
24313 UInt rG
= gregOfRexRM(pfx
, modrm
);
24314 if (epartIsReg(modrm
)) {
24315 UInt rE
= eregOfRexRM(pfx
,modrm
);
24316 putYMMReg( rG
, getYMMReg( rE
));
24317 DIP("vmovupd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24320 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24321 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24322 DIP("vmovupd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24325 goto decode_success
;
24327 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24328 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24329 UChar modrm
= getUChar(delta
);
24330 UInt rG
= gregOfRexRM(pfx
, modrm
);
24331 if (epartIsReg(modrm
)) {
24332 UInt rE
= eregOfRexRM(pfx
,modrm
);
24333 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24334 DIP("vmovups %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24337 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24338 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24339 DIP("vmovups %s,%s\n", dis_buf
, nameXMMReg(rG
));
24342 goto decode_success
;
24344 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24345 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24346 UChar modrm
= getUChar(delta
);
24347 UInt rG
= gregOfRexRM(pfx
, modrm
);
24348 if (epartIsReg(modrm
)) {
24349 UInt rE
= eregOfRexRM(pfx
,modrm
);
24350 putYMMReg( rG
, getYMMReg( rE
));
24351 DIP("vmovups %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24354 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24355 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24356 DIP("vmovups %s,%s\n", dis_buf
, nameYMMReg(rG
));
24359 goto decode_success
;
24364 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24365 /* Move 64 bits from G (low half xmm) to mem only. */
24366 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24367 UChar modrm
= getUChar(delta
);
24368 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24369 UInt rG
= gregOfRexRM(pfx
,modrm
);
24370 /* FIXME: ALIGNMENT CHECK? */
24371 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0));
24372 DIP("vmovsd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24374 goto decode_success
;
24376 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24378 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24379 UChar modrm
= getUChar(delta
);
24380 UInt rG
= gregOfRexRM(pfx
, modrm
);
24381 UInt rE
= eregOfRexRM(pfx
, modrm
);
24382 UInt rV
= getVexNvvvv(pfx
);
24384 DIP("vmovsd %s,%s,%s\n",
24385 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24386 IRTemp res
= newTemp(Ity_V128
);
24387 assign(res
, binop(Iop_64HLtoV128
,
24388 getXMMRegLane64(rV
, 1),
24389 getXMMRegLane64(rG
, 0)));
24390 putYMMRegLoAndZU(rE
, mkexpr(res
));
24392 goto decode_success
;
24394 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24395 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24396 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24397 UChar modrm
= getUChar(delta
);
24398 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24399 UInt rG
= gregOfRexRM(pfx
,modrm
);
24400 /* FIXME: ALIGNMENT CHECK? */
24401 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0));
24402 DIP("vmovss %s,%s\n", nameXMMReg(rG
), dis_buf
);
24404 goto decode_success
;
24406 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24408 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24409 UChar modrm
= getUChar(delta
);
24410 UInt rG
= gregOfRexRM(pfx
, modrm
);
24411 UInt rE
= eregOfRexRM(pfx
, modrm
);
24412 UInt rV
= getVexNvvvv(pfx
);
24414 DIP("vmovss %s,%s,%s\n",
24415 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24416 IRTemp res
= newTemp(Ity_V128
);
24417 assign( res
, binop( Iop_64HLtoV128
,
24418 getXMMRegLane64(rV
, 1),
24419 binop(Iop_32HLto64
,
24420 getXMMRegLane32(rV
, 1),
24421 getXMMRegLane32(rG
, 0)) ) );
24422 putYMMRegLoAndZU(rE
, mkexpr(res
));
24424 goto decode_success
;
24426 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24427 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24428 UChar modrm
= getUChar(delta
);
24429 UInt rG
= gregOfRexRM(pfx
,modrm
);
24430 if (epartIsReg(modrm
)) {
24431 UInt rE
= eregOfRexRM(pfx
,modrm
);
24432 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24433 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24436 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24437 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24438 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24441 goto decode_success
;
24443 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24444 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24445 UChar modrm
= getUChar(delta
);
24446 UInt rG
= gregOfRexRM(pfx
,modrm
);
24447 if (epartIsReg(modrm
)) {
24448 UInt rE
= eregOfRexRM(pfx
,modrm
);
24449 putYMMReg( rE
, getYMMReg(rG
) );
24450 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24453 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24454 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24455 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24458 goto decode_success
;
24460 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24461 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24462 UChar modrm
= getUChar(delta
);
24463 UInt rG
= gregOfRexRM(pfx
,modrm
);
24464 if (epartIsReg(modrm
)) {
24465 UInt rE
= eregOfRexRM(pfx
,modrm
);
24466 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24467 DIP("vmovups %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24470 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24471 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24472 DIP("vmovups %s,%s\n", nameXMMReg(rG
), dis_buf
);
24475 goto decode_success
;
24477 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24478 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24479 UChar modrm
= getUChar(delta
);
24480 UInt rG
= gregOfRexRM(pfx
,modrm
);
24481 if (epartIsReg(modrm
)) {
24482 UInt rE
= eregOfRexRM(pfx
,modrm
);
24483 putYMMReg( rE
, getYMMReg(rG
) );
24484 DIP("vmovups %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24487 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24488 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24489 DIP("vmovups %s,%s\n", nameYMMReg(rG
), dis_buf
);
24492 goto decode_success
;
24497 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24498 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24499 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
24500 goto decode_success
;
24502 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24503 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24504 delta
= dis_MOVDDUP_256( vbi
, pfx
, delta
);
24505 goto decode_success
;
24507 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24508 /* Insn only exists in reg form */
24509 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24510 && epartIsReg(getUChar(delta
))) {
24511 UChar modrm
= getUChar(delta
);
24512 UInt rG
= gregOfRexRM(pfx
, modrm
);
24513 UInt rE
= eregOfRexRM(pfx
, modrm
);
24514 UInt rV
= getVexNvvvv(pfx
);
24516 DIP("vmovhlps %s,%s,%s\n",
24517 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24518 IRTemp res
= newTemp(Ity_V128
);
24519 assign(res
, binop(Iop_64HLtoV128
,
24520 getXMMRegLane64(rV
, 1),
24521 getXMMRegLane64(rE
, 1)));
24522 putYMMRegLoAndZU(rG
, mkexpr(res
));
24524 goto decode_success
;
24526 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24527 /* Insn exists only in mem form, it appears. */
24528 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24529 /* Insn exists only in mem form, it appears. */
24530 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24531 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24532 UChar modrm
= getUChar(delta
);
24533 UInt rG
= gregOfRexRM(pfx
, modrm
);
24534 UInt rV
= getVexNvvvv(pfx
);
24535 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24537 DIP("vmovlpd %s,%s,%s\n",
24538 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24539 IRTemp res
= newTemp(Ity_V128
);
24540 assign(res
, binop(Iop_64HLtoV128
,
24541 getXMMRegLane64(rV
, 1),
24542 loadLE(Ity_I64
, mkexpr(addr
))));
24543 putYMMRegLoAndZU(rG
, mkexpr(res
));
24545 goto decode_success
;
24547 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24548 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24549 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24551 goto decode_success
;
24553 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24554 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24555 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, True
/*isL*/ );
24556 goto decode_success
;
24561 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24562 /* Insn exists only in mem form, it appears. */
24563 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24564 /* Insn exists only in mem form, it appears. */
24565 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24566 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24567 UChar modrm
= getUChar(delta
);
24568 UInt rG
= gregOfRexRM(pfx
, modrm
);
24569 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24571 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0));
24572 DIP("vmovlpd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24573 goto decode_success
;
24579 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24580 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24581 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24582 Bool hi
= opc
== 0x15;
24583 UChar modrm
= getUChar(delta
);
24584 UInt rG
= gregOfRexRM(pfx
,modrm
);
24585 UInt rV
= getVexNvvvv(pfx
);
24586 IRTemp eV
= newTemp(Ity_V128
);
24587 IRTemp vV
= newTemp(Ity_V128
);
24588 assign( vV
, getXMMReg(rV
) );
24589 if (epartIsReg(modrm
)) {
24590 UInt rE
= eregOfRexRM(pfx
,modrm
);
24591 assign( eV
, getXMMReg(rE
) );
24593 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24594 nameXMMReg(rE
), nameXMMReg(rG
));
24596 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24597 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24599 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24600 dis_buf
, nameXMMReg(rG
));
24602 IRTemp res
= math_UNPCKxPS_128( eV
, vV
, hi
);
24603 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24605 goto decode_success
;
24607 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24608 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24609 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24610 Bool hi
= opc
== 0x15;
24611 UChar modrm
= getUChar(delta
);
24612 UInt rG
= gregOfRexRM(pfx
,modrm
);
24613 UInt rV
= getVexNvvvv(pfx
);
24614 IRTemp eV
= newTemp(Ity_V256
);
24615 IRTemp vV
= newTemp(Ity_V256
);
24616 assign( vV
, getYMMReg(rV
) );
24617 if (epartIsReg(modrm
)) {
24618 UInt rE
= eregOfRexRM(pfx
,modrm
);
24619 assign( eV
, getYMMReg(rE
) );
24621 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24622 nameYMMReg(rE
), nameYMMReg(rG
));
24624 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24625 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24627 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24628 dis_buf
, nameYMMReg(rG
));
24630 IRTemp res
= math_UNPCKxPS_256( eV
, vV
, hi
);
24631 putYMMReg( rG
, mkexpr(res
) );
24633 goto decode_success
;
24635 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24636 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24637 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24638 Bool hi
= opc
== 0x15;
24639 UChar modrm
= getUChar(delta
);
24640 UInt rG
= gregOfRexRM(pfx
,modrm
);
24641 UInt rV
= getVexNvvvv(pfx
);
24642 IRTemp eV
= newTemp(Ity_V128
);
24643 IRTemp vV
= newTemp(Ity_V128
);
24644 assign( vV
, getXMMReg(rV
) );
24645 if (epartIsReg(modrm
)) {
24646 UInt rE
= eregOfRexRM(pfx
,modrm
);
24647 assign( eV
, getXMMReg(rE
) );
24649 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24650 nameXMMReg(rE
), nameXMMReg(rG
));
24652 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24653 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24655 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24656 dis_buf
, nameXMMReg(rG
));
24658 IRTemp res
= math_UNPCKxPD_128( eV
, vV
, hi
);
24659 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24661 goto decode_success
;
24663 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24664 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24665 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24666 Bool hi
= opc
== 0x15;
24667 UChar modrm
= getUChar(delta
);
24668 UInt rG
= gregOfRexRM(pfx
,modrm
);
24669 UInt rV
= getVexNvvvv(pfx
);
24670 IRTemp eV
= newTemp(Ity_V256
);
24671 IRTemp vV
= newTemp(Ity_V256
);
24672 assign( vV
, getYMMReg(rV
) );
24673 if (epartIsReg(modrm
)) {
24674 UInt rE
= eregOfRexRM(pfx
,modrm
);
24675 assign( eV
, getYMMReg(rE
) );
24677 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24678 nameYMMReg(rE
), nameYMMReg(rG
));
24680 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24681 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24683 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24684 dis_buf
, nameYMMReg(rG
));
24686 IRTemp res
= math_UNPCKxPD_256( eV
, vV
, hi
);
24687 putYMMReg( rG
, mkexpr(res
) );
24689 goto decode_success
;
24694 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24695 /* Insn only exists in reg form */
24696 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24697 && epartIsReg(getUChar(delta
))) {
24698 UChar modrm
= getUChar(delta
);
24699 UInt rG
= gregOfRexRM(pfx
, modrm
);
24700 UInt rE
= eregOfRexRM(pfx
, modrm
);
24701 UInt rV
= getVexNvvvv(pfx
);
24703 DIP("vmovlhps %s,%s,%s\n",
24704 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24705 IRTemp res
= newTemp(Ity_V128
);
24706 assign(res
, binop(Iop_64HLtoV128
,
24707 getXMMRegLane64(rE
, 0),
24708 getXMMRegLane64(rV
, 0)));
24709 putYMMRegLoAndZU(rG
, mkexpr(res
));
24711 goto decode_success
;
24713 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24714 /* Insn exists only in mem form, it appears. */
24715 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24716 /* Insn exists only in mem form, it appears. */
24717 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24718 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24719 UChar modrm
= getUChar(delta
);
24720 UInt rG
= gregOfRexRM(pfx
, modrm
);
24721 UInt rV
= getVexNvvvv(pfx
);
24722 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24724 DIP("vmovhp%c %s,%s,%s\n", have66(pfx
) ? 'd' : 's',
24725 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24726 IRTemp res
= newTemp(Ity_V128
);
24727 assign(res
, binop(Iop_64HLtoV128
,
24728 loadLE(Ity_I64
, mkexpr(addr
)),
24729 getXMMRegLane64(rV
, 0)));
24730 putYMMRegLoAndZU(rG
, mkexpr(res
));
24732 goto decode_success
;
24734 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24735 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24736 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24738 goto decode_success
;
24740 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24741 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24742 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, False
/*!isL*/ );
24743 goto decode_success
;
24748 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24749 /* Insn exists only in mem form, it appears. */
24750 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24751 /* Insn exists only in mem form, it appears. */
24752 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24753 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24754 UChar modrm
= getUChar(delta
);
24755 UInt rG
= gregOfRexRM(pfx
, modrm
);
24756 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24758 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 1));
24759 DIP("vmovhp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24760 nameXMMReg(rG
), dis_buf
);
24761 goto decode_success
;
24766 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24767 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24768 UChar modrm
= getUChar(delta
);
24769 UInt rG
= gregOfRexRM(pfx
, modrm
);
24770 if (epartIsReg(modrm
)) {
24771 UInt rE
= eregOfRexRM(pfx
,modrm
);
24772 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24773 DIP("vmovapd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24776 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24777 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24778 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24779 DIP("vmovapd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24782 goto decode_success
;
24784 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24785 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24786 UChar modrm
= getUChar(delta
);
24787 UInt rG
= gregOfRexRM(pfx
, modrm
);
24788 if (epartIsReg(modrm
)) {
24789 UInt rE
= eregOfRexRM(pfx
,modrm
);
24790 putYMMReg( rG
, getYMMReg( rE
));
24791 DIP("vmovapd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24794 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24795 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24796 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24797 DIP("vmovapd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24800 goto decode_success
;
24802 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24803 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24804 UChar modrm
= getUChar(delta
);
24805 UInt rG
= gregOfRexRM(pfx
, modrm
);
24806 if (epartIsReg(modrm
)) {
24807 UInt rE
= eregOfRexRM(pfx
,modrm
);
24808 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24809 DIP("vmovaps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24812 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24813 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24814 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24815 DIP("vmovaps %s,%s\n", dis_buf
, nameXMMReg(rG
));
24818 goto decode_success
;
24820 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24821 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24822 UChar modrm
= getUChar(delta
);
24823 UInt rG
= gregOfRexRM(pfx
, modrm
);
24824 if (epartIsReg(modrm
)) {
24825 UInt rE
= eregOfRexRM(pfx
,modrm
);
24826 putYMMReg( rG
, getYMMReg( rE
));
24827 DIP("vmovaps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24830 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24831 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24832 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24833 DIP("vmovaps %s,%s\n", dis_buf
, nameYMMReg(rG
));
24836 goto decode_success
;
24841 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24842 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24843 UChar modrm
= getUChar(delta
);
24844 UInt rG
= gregOfRexRM(pfx
,modrm
);
24845 if (epartIsReg(modrm
)) {
24846 UInt rE
= eregOfRexRM(pfx
,modrm
);
24847 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24848 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24851 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24852 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24853 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24854 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24857 goto decode_success
;
24859 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24860 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24861 UChar modrm
= getUChar(delta
);
24862 UInt rG
= gregOfRexRM(pfx
,modrm
);
24863 if (epartIsReg(modrm
)) {
24864 UInt rE
= eregOfRexRM(pfx
,modrm
);
24865 putYMMReg( rE
, getYMMReg(rG
) );
24866 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24869 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24870 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24871 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24872 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24875 goto decode_success
;
24877 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24878 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24879 UChar modrm
= getUChar(delta
);
24880 UInt rG
= gregOfRexRM(pfx
,modrm
);
24881 if (epartIsReg(modrm
)) {
24882 UInt rE
= eregOfRexRM(pfx
,modrm
);
24883 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24884 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24886 goto decode_success
;
24888 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24889 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24890 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24891 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), dis_buf
);
24893 goto decode_success
;
24896 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24897 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24898 UChar modrm
= getUChar(delta
);
24899 UInt rG
= gregOfRexRM(pfx
,modrm
);
24900 if (epartIsReg(modrm
)) {
24901 UInt rE
= eregOfRexRM(pfx
,modrm
);
24902 putYMMReg( rE
, getYMMReg(rG
) );
24903 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24905 goto decode_success
;
24907 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24908 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24909 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24910 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), dis_buf
);
24912 goto decode_success
;
24918 IRTemp rmode
= newTemp(Ity_I32
);
24919 assign( rmode
, get_sse_roundingmode() );
24920 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24921 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24922 UChar modrm
= getUChar(delta
);
24923 UInt rV
= getVexNvvvv(pfx
);
24924 UInt rD
= gregOfRexRM(pfx
, modrm
);
24925 IRTemp arg32
= newTemp(Ity_I32
);
24926 if (epartIsReg(modrm
)) {
24927 UInt rS
= eregOfRexRM(pfx
,modrm
);
24928 assign( arg32
, getIReg32(rS
) );
24930 DIP("vcvtsi2sdl %s,%s,%s\n",
24931 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24933 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24934 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
24936 DIP("vcvtsi2sdl %s,%s,%s\n",
24937 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24939 putXMMRegLane64F( rD
, 0,
24940 unop(Iop_I32StoF64
, mkexpr(arg32
)));
24941 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24942 putYMMRegLane128( rD
, 1, mkV128(0) );
24944 goto decode_success
;
24946 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24947 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24948 UChar modrm
= getUChar(delta
);
24949 UInt rV
= getVexNvvvv(pfx
);
24950 UInt rD
= gregOfRexRM(pfx
, modrm
);
24951 IRTemp arg64
= newTemp(Ity_I64
);
24952 if (epartIsReg(modrm
)) {
24953 UInt rS
= eregOfRexRM(pfx
,modrm
);
24954 assign( arg64
, getIReg64(rS
) );
24956 DIP("vcvtsi2sdq %s,%s,%s\n",
24957 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24959 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24960 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24962 DIP("vcvtsi2sdq %s,%s,%s\n",
24963 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24965 putXMMRegLane64F( rD
, 0,
24966 binop( Iop_I64StoF64
,
24967 get_sse_roundingmode(),
24969 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24970 putYMMRegLane128( rD
, 1, mkV128(0) );
24972 goto decode_success
;
24974 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24975 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24976 UChar modrm
= getUChar(delta
);
24977 UInt rV
= getVexNvvvv(pfx
);
24978 UInt rD
= gregOfRexRM(pfx
, modrm
);
24979 IRTemp arg64
= newTemp(Ity_I64
);
24980 if (epartIsReg(modrm
)) {
24981 UInt rS
= eregOfRexRM(pfx
,modrm
);
24982 assign( arg64
, getIReg64(rS
) );
24984 DIP("vcvtsi2ssq %s,%s,%s\n",
24985 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24987 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24988 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24990 DIP("vcvtsi2ssq %s,%s,%s\n",
24991 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24993 putXMMRegLane32F( rD
, 0,
24994 binop(Iop_F64toF32
,
24996 binop(Iop_I64StoF64
, mkexpr(rmode
),
24997 mkexpr(arg64
)) ) );
24998 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
24999 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25000 putYMMRegLane128( rD
, 1, mkV128(0) );
25002 goto decode_success
;
25004 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
25005 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25006 UChar modrm
= getUChar(delta
);
25007 UInt rV
= getVexNvvvv(pfx
);
25008 UInt rD
= gregOfRexRM(pfx
, modrm
);
25009 IRTemp arg32
= newTemp(Ity_I32
);
25010 if (epartIsReg(modrm
)) {
25011 UInt rS
= eregOfRexRM(pfx
,modrm
);
25012 assign( arg32
, getIReg32(rS
) );
25014 DIP("vcvtsi2ssl %s,%s,%s\n",
25015 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25017 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25018 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
25020 DIP("vcvtsi2ssl %s,%s,%s\n",
25021 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25023 putXMMRegLane32F( rD
, 0,
25024 binop(Iop_F64toF32
,
25026 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
25027 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
25028 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25029 putYMMRegLane128( rD
, 1, mkV128(0) );
25031 goto decode_success
;
25037 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
25038 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
25039 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
25040 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
25041 UChar modrm
= getUChar(delta
);
25042 UInt rS
= gregOfRexRM(pfx
, modrm
);
25043 IRTemp tS
= newTemp(Ity_V128
);
25044 assign(tS
, getXMMReg(rS
));
25045 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25047 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
25048 storeLE(mkexpr(addr
), mkexpr(tS
));
25049 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
25050 nameXMMReg(rS
), dis_buf
);
25051 goto decode_success
;
25053 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
25054 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
25055 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
25056 && 1==getVexL(pfx
)/*256*/ && !epartIsReg(getUChar(delta
))) {
25057 UChar modrm
= getUChar(delta
);
25058 UInt rS
= gregOfRexRM(pfx
, modrm
);
25059 IRTemp tS
= newTemp(Ity_V256
);
25060 assign(tS
, getYMMReg(rS
));
25061 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25063 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
25064 storeLE(mkexpr(addr
), mkexpr(tS
));
25065 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
25066 nameYMMReg(rS
), dis_buf
);
25067 goto decode_success
;
25072 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
25073 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25074 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25075 goto decode_success
;
25077 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
25078 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25079 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25080 goto decode_success
;
25082 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
25083 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25084 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25085 goto decode_success
;
25087 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
25088 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25089 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25090 goto decode_success
;
25095 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
25096 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25097 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25098 goto decode_success
;
25100 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
25101 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25102 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25103 goto decode_success
;
25105 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
25106 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25107 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25108 goto decode_success
;
25110 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
25111 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25112 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25113 goto decode_success
;
25119 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
25120 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
25121 if (have66noF2noF3(pfx
)) {
25122 delta
= dis_COMISD( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
25123 goto decode_success
;
25125 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
25126 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
25127 if (haveNo66noF2noF3(pfx
)) {
25128 delta
= dis_COMISS( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
25129 goto decode_success
;
25134 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25135 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25136 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25137 goto decode_success
;
25139 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25140 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25141 delta
= dis_MOVMSKPD_256( vbi
, pfx
, delta
);
25142 goto decode_success
;
25144 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25145 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25146 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25147 goto decode_success
;
25149 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25150 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25151 delta
= dis_MOVMSKPS_256( vbi
, pfx
, delta
);
25152 goto decode_success
;
25157 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25158 if (haveF3no66noF2(pfx
)) {
25159 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25160 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtss", Iop_Sqrt32F0x4
);
25161 goto decode_success
;
25163 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25164 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25165 delta
= dis_AVX128_E_to_G_unary_all(
25166 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx4
);
25167 goto decode_success
;
25169 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25170 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25171 delta
= dis_AVX256_E_to_G_unary_all(
25172 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx8
);
25173 goto decode_success
;
25175 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25176 if (haveF2no66noF3(pfx
)) {
25177 delta
= dis_AVX128_E_V_to_G_lo64_unary(
25178 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtsd", Iop_Sqrt64F0x2
);
25179 goto decode_success
;
25181 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25182 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25183 delta
= dis_AVX128_E_to_G_unary_all(
25184 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx2
);
25185 goto decode_success
;
25187 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25188 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25189 delta
= dis_AVX256_E_to_G_unary_all(
25190 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx4
);
25191 goto decode_success
;
25196 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25197 if (haveF3no66noF2(pfx
)) {
25198 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25199 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtss",
25200 Iop_RSqrtEst32F0x4
);
25201 goto decode_success
;
25203 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25204 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25205 delta
= dis_AVX128_E_to_G_unary_all(
25206 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx4
);
25207 goto decode_success
;
25209 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25210 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25211 delta
= dis_AVX256_E_to_G_unary_all(
25212 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx8
);
25213 goto decode_success
;
25218 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25219 if (haveF3no66noF2(pfx
)) {
25220 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25221 uses_vvvv
, vbi
, pfx
, delta
, "vrcpss", Iop_RecipEst32F0x4
);
25222 goto decode_success
;
25224 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25225 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25226 delta
= dis_AVX128_E_to_G_unary_all(
25227 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx4
);
25228 goto decode_success
;
25230 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25231 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25232 delta
= dis_AVX256_E_to_G_unary_all(
25233 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx8
);
25234 goto decode_success
;
25239 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25240 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25241 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25242 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25243 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
);
25244 goto decode_success
;
25246 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25247 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25248 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25249 delta
= dis_AVX256_E_V_to_G(
25250 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
);
25251 goto decode_success
;
25253 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25254 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25255 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25256 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
);
25257 goto decode_success
;
25259 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25260 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25261 delta
= dis_AVX256_E_V_to_G(
25262 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
);
25263 goto decode_success
;
25268 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25269 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25270 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25271 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25272 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
,
25273 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25274 goto decode_success
;
25276 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25277 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25278 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25279 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
,
25280 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25281 goto decode_success
;
25283 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25284 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25285 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25286 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
,
25287 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25288 goto decode_success
;
25290 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25291 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25292 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25293 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
,
25294 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25295 goto decode_success
;
25300 /* VORPD r/m, rV, r ::: r = rV | r/m */
25301 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25302 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25303 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25304 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV128
);
25305 goto decode_success
;
25307 /* VORPD r/m, rV, r ::: r = rV | r/m */
25308 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25309 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25310 delta
= dis_AVX256_E_V_to_G(
25311 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV256
);
25312 goto decode_success
;
25314 /* VORPS r/m, rV, r ::: r = rV | r/m */
25315 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25316 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25317 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25318 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV128
);
25319 goto decode_success
;
25321 /* VORPS r/m, rV, r ::: r = rV | r/m */
25322 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25323 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25324 delta
= dis_AVX256_E_V_to_G(
25325 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV256
);
25326 goto decode_success
;
25331 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25332 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25333 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25334 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25335 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV128
);
25336 goto decode_success
;
25338 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25339 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25340 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25341 delta
= dis_AVX256_E_V_to_G(
25342 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV256
);
25343 goto decode_success
;
25345 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25346 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25347 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25348 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25349 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV128
);
25350 goto decode_success
;
25352 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25353 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25354 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25355 delta
= dis_AVX256_E_V_to_G(
25356 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV256
);
25357 goto decode_success
;
25362 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25363 if (haveF2no66noF3(pfx
)) {
25364 delta
= dis_AVX128_E_V_to_G_lo64(
25365 uses_vvvv
, vbi
, pfx
, delta
, "vaddsd", Iop_Add64F0x2
);
25366 goto decode_success
;
25368 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25369 if (haveF3no66noF2(pfx
)) {
25370 delta
= dis_AVX128_E_V_to_G_lo32(
25371 uses_vvvv
, vbi
, pfx
, delta
, "vaddss", Iop_Add32F0x4
);
25372 goto decode_success
;
25374 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25375 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25376 delta
= dis_AVX128_E_V_to_G(
25377 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx4
);
25378 goto decode_success
;
25380 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25381 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25382 delta
= dis_AVX256_E_V_to_G(
25383 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx8
);
25384 goto decode_success
;
25386 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25387 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25388 delta
= dis_AVX128_E_V_to_G(
25389 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx2
);
25390 goto decode_success
;
25392 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25393 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25394 delta
= dis_AVX256_E_V_to_G(
25395 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx4
);
25396 goto decode_success
;
25401 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25402 if (haveF2no66noF3(pfx
)) {
25403 delta
= dis_AVX128_E_V_to_G_lo64(
25404 uses_vvvv
, vbi
, pfx
, delta
, "vmulsd", Iop_Mul64F0x2
);
25405 goto decode_success
;
25407 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25408 if (haveF3no66noF2(pfx
)) {
25409 delta
= dis_AVX128_E_V_to_G_lo32(
25410 uses_vvvv
, vbi
, pfx
, delta
, "vmulss", Iop_Mul32F0x4
);
25411 goto decode_success
;
25413 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25414 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25415 delta
= dis_AVX128_E_V_to_G(
25416 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx4
);
25417 goto decode_success
;
25419 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25420 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25421 delta
= dis_AVX256_E_V_to_G(
25422 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx8
);
25423 goto decode_success
;
25425 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25426 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25427 delta
= dis_AVX128_E_V_to_G(
25428 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx2
);
25429 goto decode_success
;
25431 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25432 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25433 delta
= dis_AVX256_E_V_to_G(
25434 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx4
);
25435 goto decode_success
;
25440 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25441 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25442 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25443 goto decode_success
;
25445 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25446 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25447 delta
= dis_CVTPS2PD_256( vbi
, pfx
, delta
);
25448 goto decode_success
;
25450 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25451 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25452 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25453 goto decode_success
;
25455 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25456 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25457 delta
= dis_CVTPD2PS_256( vbi
, pfx
, delta
);
25458 goto decode_success
;
25460 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25461 if (haveF2no66noF3(pfx
)) {
25462 UChar modrm
= getUChar(delta
);
25463 UInt rV
= getVexNvvvv(pfx
);
25464 UInt rD
= gregOfRexRM(pfx
, modrm
);
25465 IRTemp f64lo
= newTemp(Ity_F64
);
25466 IRTemp rmode
= newTemp(Ity_I32
);
25467 assign( rmode
, get_sse_roundingmode() );
25468 if (epartIsReg(modrm
)) {
25469 UInt rS
= eregOfRexRM(pfx
,modrm
);
25470 assign(f64lo
, getXMMRegLane64F(rS
, 0));
25472 DIP("vcvtsd2ss %s,%s,%s\n",
25473 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25475 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25476 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)) );
25478 DIP("vcvtsd2ss %s,%s,%s\n",
25479 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25481 putXMMRegLane32F( rD
, 0,
25482 binop( Iop_F64toF32
, mkexpr(rmode
),
25484 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
25485 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25486 putYMMRegLane128( rD
, 1, mkV128(0) );
25488 goto decode_success
;
25490 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25491 if (haveF3no66noF2(pfx
)) {
25492 UChar modrm
= getUChar(delta
);
25493 UInt rV
= getVexNvvvv(pfx
);
25494 UInt rD
= gregOfRexRM(pfx
, modrm
);
25495 IRTemp f32lo
= newTemp(Ity_F32
);
25496 if (epartIsReg(modrm
)) {
25497 UInt rS
= eregOfRexRM(pfx
,modrm
);
25498 assign(f32lo
, getXMMRegLane32F(rS
, 0));
25500 DIP("vcvtss2sd %s,%s,%s\n",
25501 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25503 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25504 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
25506 DIP("vcvtss2sd %s,%s,%s\n",
25507 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25509 putXMMRegLane64F( rD
, 0,
25510 unop( Iop_F32toF64
, mkexpr(f32lo
)) );
25511 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25512 putYMMRegLane128( rD
, 1, mkV128(0) );
25514 goto decode_success
;
25519 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25520 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25521 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25522 True
/*isAvx*/, False
/*!r2zero*/ );
25523 goto decode_success
;
25525 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25526 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25527 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25528 False
/*!r2zero*/ );
25529 goto decode_success
;
25531 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25532 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
25533 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25534 True
/*isAvx*/, True
/*r2zero*/ );
25535 goto decode_success
;
25537 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25538 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
25539 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25541 goto decode_success
;
25543 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25544 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25545 delta
= dis_CVTDQ2PS_128 ( vbi
, pfx
, delta
, True
/*isAvx*/ );
25546 goto decode_success
;
25548 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25549 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25550 delta
= dis_CVTDQ2PS_256 ( vbi
, pfx
, delta
);
25551 goto decode_success
;
25556 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25557 if (haveF2no66noF3(pfx
)) {
25558 delta
= dis_AVX128_E_V_to_G_lo64(
25559 uses_vvvv
, vbi
, pfx
, delta
, "vsubsd", Iop_Sub64F0x2
);
25560 goto decode_success
;
25562 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25563 if (haveF3no66noF2(pfx
)) {
25564 delta
= dis_AVX128_E_V_to_G_lo32(
25565 uses_vvvv
, vbi
, pfx
, delta
, "vsubss", Iop_Sub32F0x4
);
25566 goto decode_success
;
25568 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25569 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25570 delta
= dis_AVX128_E_V_to_G(
25571 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx4
);
25572 goto decode_success
;
25574 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25575 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25576 delta
= dis_AVX256_E_V_to_G(
25577 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx8
);
25578 goto decode_success
;
25580 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25581 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25582 delta
= dis_AVX128_E_V_to_G(
25583 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx2
);
25584 goto decode_success
;
25586 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25587 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25588 delta
= dis_AVX256_E_V_to_G(
25589 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx4
);
25590 goto decode_success
;
25595 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25596 if (haveF2no66noF3(pfx
)) {
25597 delta
= dis_AVX128_E_V_to_G_lo64(
25598 uses_vvvv
, vbi
, pfx
, delta
, "vminsd", Iop_Min64F0x2
);
25599 goto decode_success
;
25601 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25602 if (haveF3no66noF2(pfx
)) {
25603 delta
= dis_AVX128_E_V_to_G_lo32(
25604 uses_vvvv
, vbi
, pfx
, delta
, "vminss", Iop_Min32F0x4
);
25605 goto decode_success
;
25607 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25608 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25609 delta
= dis_AVX128_E_V_to_G(
25610 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx4
);
25611 goto decode_success
;
25613 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25614 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25615 delta
= dis_AVX256_E_V_to_G(
25616 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx8
);
25617 goto decode_success
;
25619 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25620 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25621 delta
= dis_AVX128_E_V_to_G(
25622 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx2
);
25623 goto decode_success
;
25625 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25626 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25627 delta
= dis_AVX256_E_V_to_G(
25628 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx4
);
25629 goto decode_success
;
25634 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25635 if (haveF2no66noF3(pfx
)) {
25636 delta
= dis_AVX128_E_V_to_G_lo64(
25637 uses_vvvv
, vbi
, pfx
, delta
, "vdivsd", Iop_Div64F0x2
);
25638 goto decode_success
;
25640 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25641 if (haveF3no66noF2(pfx
)) {
25642 delta
= dis_AVX128_E_V_to_G_lo32(
25643 uses_vvvv
, vbi
, pfx
, delta
, "vdivss", Iop_Div32F0x4
);
25644 goto decode_success
;
25646 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25647 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25648 delta
= dis_AVX128_E_V_to_G(
25649 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx4
);
25650 goto decode_success
;
25652 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25653 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25654 delta
= dis_AVX256_E_V_to_G(
25655 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx8
);
25656 goto decode_success
;
25658 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25659 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25660 delta
= dis_AVX128_E_V_to_G(
25661 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx2
);
25662 goto decode_success
;
25664 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25665 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25666 delta
= dis_AVX256_E_V_to_G(
25667 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx4
);
25668 goto decode_success
;
25673 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25674 if (haveF2no66noF3(pfx
)) {
25675 delta
= dis_AVX128_E_V_to_G_lo64(
25676 uses_vvvv
, vbi
, pfx
, delta
, "vmaxsd", Iop_Max64F0x2
);
25677 goto decode_success
;
25679 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25680 if (haveF3no66noF2(pfx
)) {
25681 delta
= dis_AVX128_E_V_to_G_lo32(
25682 uses_vvvv
, vbi
, pfx
, delta
, "vmaxss", Iop_Max32F0x4
);
25683 goto decode_success
;
25685 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25686 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25687 delta
= dis_AVX128_E_V_to_G(
25688 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx4
);
25689 goto decode_success
;
25691 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25692 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25693 delta
= dis_AVX256_E_V_to_G(
25694 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx8
);
25695 goto decode_success
;
25697 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25698 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25699 delta
= dis_AVX128_E_V_to_G(
25700 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx2
);
25701 goto decode_success
;
25703 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25704 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25705 delta
= dis_AVX256_E_V_to_G(
25706 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx4
);
25707 goto decode_success
;
25712 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25713 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25714 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25715 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25716 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25717 Iop_InterleaveLO8x16
, NULL
,
25718 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25719 goto decode_success
;
25721 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25722 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25723 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25724 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25725 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25726 math_VPUNPCKLBW_YMM
);
25727 goto decode_success
;
25732 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25733 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25734 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25735 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25736 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25737 Iop_InterleaveLO16x8
, NULL
,
25738 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25739 goto decode_success
;
25741 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25742 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25743 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25744 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25745 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25746 math_VPUNPCKLWD_YMM
);
25747 goto decode_success
;
25752 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25753 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25754 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25755 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25756 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25757 Iop_InterleaveLO32x4
, NULL
,
25758 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25759 goto decode_success
;
25761 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25762 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25763 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25764 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25765 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25766 math_VPUNPCKLDQ_YMM
);
25767 goto decode_success
;
25772 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25773 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25774 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25775 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25776 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25777 Iop_QNarrowBin16Sto8Sx16
, NULL
,
25778 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25779 goto decode_success
;
25781 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25782 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25783 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25784 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25785 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25786 math_VPACKSSWB_YMM
);
25787 goto decode_success
;
25792 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25793 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25794 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25795 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25796 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx16
);
25797 goto decode_success
;
25799 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25800 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25801 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25802 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25803 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx32
);
25804 goto decode_success
;
25809 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25810 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25811 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25812 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25813 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx8
);
25814 goto decode_success
;
25816 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25817 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25818 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25819 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25820 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx16
);
25821 goto decode_success
;
25826 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25827 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25828 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25829 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25830 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx4
);
25831 goto decode_success
;
25833 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25834 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25835 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25836 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25837 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx8
);
25838 goto decode_success
;
25843 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25844 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25845 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25846 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25847 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25848 Iop_QNarrowBin16Sto8Ux16
, NULL
,
25849 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25850 goto decode_success
;
25852 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25853 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25854 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25855 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25856 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25857 math_VPACKUSWB_YMM
);
25858 goto decode_success
;
25863 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25864 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25865 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25866 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25867 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25868 Iop_InterleaveHI8x16
, NULL
,
25869 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25870 goto decode_success
;
25872 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25873 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25874 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25875 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25876 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25877 math_VPUNPCKHBW_YMM
);
25878 goto decode_success
;
25883 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25884 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25885 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25886 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25887 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25888 Iop_InterleaveHI16x8
, NULL
,
25889 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25890 goto decode_success
;
25892 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25893 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25894 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25895 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25896 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25897 math_VPUNPCKHWD_YMM
);
25898 goto decode_success
;
25903 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25904 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25905 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25906 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25907 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25908 Iop_InterleaveHI32x4
, NULL
,
25909 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25910 goto decode_success
;
25912 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25913 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25914 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25915 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25916 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25917 math_VPUNPCKHDQ_YMM
);
25918 goto decode_success
;
25923 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25924 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25925 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25926 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25927 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25928 Iop_QNarrowBin32Sto16Sx8
, NULL
,
25929 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25930 goto decode_success
;
25932 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25933 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25934 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25935 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25936 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25937 math_VPACKSSDW_YMM
);
25938 goto decode_success
;
25943 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25944 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25945 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25946 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25947 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25948 Iop_InterleaveLO64x2
, NULL
,
25949 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25950 goto decode_success
;
25952 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25953 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25954 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25955 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25956 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25957 math_VPUNPCKLQDQ_YMM
);
25958 goto decode_success
;
25963 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25964 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25965 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25966 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25967 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25968 Iop_InterleaveHI64x2
, NULL
,
25969 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25970 goto decode_success
;
25972 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25973 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25974 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25975 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25976 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25977 math_VPUNPCKHQDQ_YMM
);
25978 goto decode_success
;
25983 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25984 if (have66noF2noF3(pfx
)
25985 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
25986 vassert(sz
== 2); /* even tho we are transferring 4, not 2. */
25987 UChar modrm
= getUChar(delta
);
25988 if (epartIsReg(modrm
)) {
25991 gregOfRexRM(pfx
,modrm
),
25992 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
25994 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
25995 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25997 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26000 gregOfRexRM(pfx
,modrm
),
26001 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)))
26003 DIP("vmovd %s, %s\n", dis_buf
,
26004 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
26006 goto decode_success
;
26008 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
26009 if (have66noF2noF3(pfx
)
26010 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
26011 vassert(sz
== 2); /* even tho we are transferring 8, not 2. */
26012 UChar modrm
= getUChar(delta
);
26013 if (epartIsReg(modrm
)) {
26016 gregOfRexRM(pfx
,modrm
),
26017 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
26019 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
26020 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
26022 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26025 gregOfRexRM(pfx
,modrm
),
26026 unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)))
26028 DIP("vmovq %s, %s\n", dis_buf
,
26029 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
26031 goto decode_success
;
26036 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
26037 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
26038 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26039 && 1==getVexL(pfx
)/*256*/) {
26040 UChar modrm
= getUChar(delta
);
26041 UInt rD
= gregOfRexRM(pfx
, modrm
);
26042 IRTemp tD
= newTemp(Ity_V256
);
26043 Bool isA
= have66noF2noF3(pfx
);
26044 HChar ch
= isA
? 'a' : 'u';
26045 if (epartIsReg(modrm
)) {
26046 UInt rS
= eregOfRexRM(pfx
, modrm
);
26048 assign(tD
, getYMMReg(rS
));
26049 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
26051 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26054 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
26055 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
26056 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameYMMReg(rD
));
26058 putYMMReg(rD
, mkexpr(tD
));
26059 goto decode_success
;
26061 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
26062 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
26063 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26064 && 0==getVexL(pfx
)/*128*/) {
26065 UChar modrm
= getUChar(delta
);
26066 UInt rD
= gregOfRexRM(pfx
, modrm
);
26067 IRTemp tD
= newTemp(Ity_V128
);
26068 Bool isA
= have66noF2noF3(pfx
);
26069 HChar ch
= isA
? 'a' : 'u';
26070 if (epartIsReg(modrm
)) {
26071 UInt rS
= eregOfRexRM(pfx
, modrm
);
26073 assign(tD
, getXMMReg(rS
));
26074 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
26076 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26079 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
26080 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
26081 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameXMMReg(rD
));
26083 putYMMRegLoAndZU(rD
, mkexpr(tD
));
26084 goto decode_success
;
26089 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
26090 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26091 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, True
/*writesYmm*/);
26092 goto decode_success
;
26094 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
26095 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26096 delta
= dis_PSHUFD_32x8( vbi
, pfx
, delta
);
26097 goto decode_success
;
26099 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
26100 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26101 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
26102 True
/*isAvx*/, False
/*!xIsH*/ );
26103 goto decode_success
;
26105 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
26106 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26107 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, False
/*!xIsH*/ );
26108 goto decode_success
;
26110 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
26111 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
26112 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
26113 True
/*isAvx*/, True
/*xIsH*/ );
26114 goto decode_success
;
26116 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
26117 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
26118 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, True
/*xIsH*/ );
26119 goto decode_success
;
26124 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
26125 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
26126 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
26127 if (have66noF2noF3(pfx
)
26128 && 0==getVexL(pfx
)/*128*/
26129 && epartIsReg(getUChar(delta
))) {
26130 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26131 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26132 "vpsrlw", Iop_ShrN16x8
);
26134 goto decode_success
;
26136 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26137 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26138 "vpsraw", Iop_SarN16x8
);
26140 goto decode_success
;
26142 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26143 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26144 "vpsllw", Iop_ShlN16x8
);
26146 goto decode_success
;
26148 /* else fall through */
26150 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26151 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26152 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26153 if (have66noF2noF3(pfx
)
26154 && 1==getVexL(pfx
)/*256*/
26155 && epartIsReg(getUChar(delta
))) {
26156 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26157 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26158 "vpsrlw", Iop_ShrN16x16
);
26160 goto decode_success
;
26162 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26163 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26164 "vpsraw", Iop_SarN16x16
);
26166 goto decode_success
;
26168 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26169 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26170 "vpsllw", Iop_ShlN16x16
);
26172 goto decode_success
;
26174 /* else fall through */
26179 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26180 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26181 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26182 if (have66noF2noF3(pfx
)
26183 && 0==getVexL(pfx
)/*128*/
26184 && epartIsReg(getUChar(delta
))) {
26185 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26186 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26187 "vpsrld", Iop_ShrN32x4
);
26189 goto decode_success
;
26191 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26192 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26193 "vpsrad", Iop_SarN32x4
);
26195 goto decode_success
;
26197 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26198 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26199 "vpslld", Iop_ShlN32x4
);
26201 goto decode_success
;
26203 /* else fall through */
26205 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26206 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26207 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26208 if (have66noF2noF3(pfx
)
26209 && 1==getVexL(pfx
)/*256*/
26210 && epartIsReg(getUChar(delta
))) {
26211 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26212 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26213 "vpsrld", Iop_ShrN32x8
);
26215 goto decode_success
;
26217 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26218 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26219 "vpsrad", Iop_SarN32x8
);
26221 goto decode_success
;
26223 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26224 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26225 "vpslld", Iop_ShlN32x8
);
26227 goto decode_success
;
26229 /* else fall through */
26234 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26235 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26236 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26237 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26238 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
26239 && epartIsReg(getUChar(delta
))) {
26240 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26241 Int rD
= getVexNvvvv(pfx
);
26242 IRTemp vecS
= newTemp(Ity_V128
);
26243 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26244 Int imm
= (Int
)getUChar(delta
+1);
26245 DIP("vpsrldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26247 assign( vecS
, getXMMReg(rS
) );
26248 putYMMRegLoAndZU(rD
, mkexpr(math_PSRLDQ( vecS
, imm
)));
26250 goto decode_success
;
26252 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26253 Int imm
= (Int
)getUChar(delta
+1);
26254 DIP("vpslldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26256 assign( vecS
, getXMMReg(rS
) );
26257 putYMMRegLoAndZU(rD
, mkexpr(math_PSLLDQ( vecS
, imm
)));
26259 goto decode_success
;
26261 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26262 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26263 "vpsrlq", Iop_ShrN64x2
);
26265 goto decode_success
;
26267 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26268 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26269 "vpsllq", Iop_ShlN64x2
);
26271 goto decode_success
;
26273 /* else fall through */
26275 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26276 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26277 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26278 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26279 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
26280 && epartIsReg(getUChar(delta
))) {
26281 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26282 Int rD
= getVexNvvvv(pfx
);
26283 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26284 IRTemp vecS0
= newTemp(Ity_V128
);
26285 IRTemp vecS1
= newTemp(Ity_V128
);
26286 Int imm
= (Int
)getUChar(delta
+1);
26287 DIP("vpsrldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26289 assign( vecS0
, getYMMRegLane128(rS
, 0));
26290 assign( vecS1
, getYMMRegLane128(rS
, 1));
26291 putYMMRegLane128(rD
, 0, mkexpr(math_PSRLDQ( vecS0
, imm
)));
26292 putYMMRegLane128(rD
, 1, mkexpr(math_PSRLDQ( vecS1
, imm
)));
26294 goto decode_success
;
26296 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26297 IRTemp vecS0
= newTemp(Ity_V128
);
26298 IRTemp vecS1
= newTemp(Ity_V128
);
26299 Int imm
= (Int
)getUChar(delta
+1);
26300 DIP("vpslldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26302 assign( vecS0
, getYMMRegLane128(rS
, 0));
26303 assign( vecS1
, getYMMRegLane128(rS
, 1));
26304 putYMMRegLane128(rD
, 0, mkexpr(math_PSLLDQ( vecS0
, imm
)));
26305 putYMMRegLane128(rD
, 1, mkexpr(math_PSLLDQ( vecS1
, imm
)));
26307 goto decode_success
;
26309 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26310 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26311 "vpsrlq", Iop_ShrN64x4
);
26313 goto decode_success
;
26315 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26316 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26317 "vpsllq", Iop_ShlN64x4
);
26319 goto decode_success
;
26321 /* else fall through */
26326 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26327 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26328 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26329 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26330 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x16
);
26331 goto decode_success
;
26333 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26334 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26335 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26336 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26337 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x32
);
26338 goto decode_success
;
26343 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26344 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26345 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26346 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26347 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x8
);
26348 goto decode_success
;
26350 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26351 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26352 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26353 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26354 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x16
);
26355 goto decode_success
;
26360 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26361 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26362 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26363 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26364 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x4
);
26365 goto decode_success
;
26367 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26368 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26369 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26370 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26371 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x8
);
26372 goto decode_success
;
26377 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26378 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26380 IRTemp zero128
= newTemp(Ity_V128
);
26381 assign(zero128
, mkV128(0));
26382 for (i
= 0; i
< 16; i
++) {
26383 putYMMRegLane128(i
, 1, mkexpr(zero128
));
26385 DIP("vzeroupper\n");
26386 goto decode_success
;
26388 /* VZEROALL = VEX.256.0F.WIG 77 */
26389 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26391 IRTemp zero128
= newTemp(Ity_V128
);
26392 assign(zero128
, mkV128(0));
26393 for (i
= 0; i
< 16; i
++) {
26394 putYMMRegLoAndZU(i
, mkexpr(zero128
));
26397 goto decode_success
;
26403 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26404 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26405 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26406 IRTemp sV
= newTemp(Ity_V128
);
26407 IRTemp dV
= newTemp(Ity_V128
);
26408 Bool isAdd
= opc
== 0x7C;
26409 const HChar
* str
= isAdd
? "add" : "sub";
26410 UChar modrm
= getUChar(delta
);
26411 UInt rG
= gregOfRexRM(pfx
,modrm
);
26412 UInt rV
= getVexNvvvv(pfx
);
26413 if (epartIsReg(modrm
)) {
26414 UInt rE
= eregOfRexRM(pfx
,modrm
);
26415 assign( sV
, getXMMReg(rE
) );
26416 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26417 nameXMMReg(rV
), nameXMMReg(rG
));
26420 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26421 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26422 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26423 nameXMMReg(rV
), nameXMMReg(rG
));
26426 assign( dV
, getXMMReg(rV
) );
26427 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPS_128 ( dV
, sV
, isAdd
) ) );
26429 goto decode_success
;
26431 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26432 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26433 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26434 IRTemp sV
= newTemp(Ity_V256
);
26435 IRTemp dV
= newTemp(Ity_V256
);
26436 IRTemp s1
, s0
, d1
, d0
;
26437 Bool isAdd
= opc
== 0x7C;
26438 const HChar
* str
= isAdd
? "add" : "sub";
26439 UChar modrm
= getUChar(delta
);
26440 UInt rG
= gregOfRexRM(pfx
,modrm
);
26441 UInt rV
= getVexNvvvv(pfx
);
26442 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26443 if (epartIsReg(modrm
)) {
26444 UInt rE
= eregOfRexRM(pfx
,modrm
);
26445 assign( sV
, getYMMReg(rE
) );
26446 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26447 nameYMMReg(rV
), nameYMMReg(rG
));
26450 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26451 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26452 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26453 nameYMMReg(rV
), nameYMMReg(rG
));
26456 assign( dV
, getYMMReg(rV
) );
26457 breakupV256toV128s( dV
, &d1
, &d0
);
26458 breakupV256toV128s( sV
, &s1
, &s0
);
26459 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26460 mkexpr( math_HADDPS_128 ( d1
, s1
, isAdd
) ),
26461 mkexpr( math_HADDPS_128 ( d0
, s0
, isAdd
) ) ) );
26463 goto decode_success
;
26465 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26466 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26467 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26468 IRTemp sV
= newTemp(Ity_V128
);
26469 IRTemp dV
= newTemp(Ity_V128
);
26470 Bool isAdd
= opc
== 0x7C;
26471 const HChar
* str
= isAdd
? "add" : "sub";
26472 UChar modrm
= getUChar(delta
);
26473 UInt rG
= gregOfRexRM(pfx
,modrm
);
26474 UInt rV
= getVexNvvvv(pfx
);
26475 if (epartIsReg(modrm
)) {
26476 UInt rE
= eregOfRexRM(pfx
,modrm
);
26477 assign( sV
, getXMMReg(rE
) );
26478 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26479 nameXMMReg(rV
), nameXMMReg(rG
));
26482 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26483 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26484 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26485 nameXMMReg(rV
), nameXMMReg(rG
));
26488 assign( dV
, getXMMReg(rV
) );
26489 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPD_128 ( dV
, sV
, isAdd
) ) );
26491 goto decode_success
;
26493 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26494 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26495 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26496 IRTemp sV
= newTemp(Ity_V256
);
26497 IRTemp dV
= newTemp(Ity_V256
);
26498 IRTemp s1
, s0
, d1
, d0
;
26499 Bool isAdd
= opc
== 0x7C;
26500 const HChar
* str
= isAdd
? "add" : "sub";
26501 UChar modrm
= getUChar(delta
);
26502 UInt rG
= gregOfRexRM(pfx
,modrm
);
26503 UInt rV
= getVexNvvvv(pfx
);
26504 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26505 if (epartIsReg(modrm
)) {
26506 UInt rE
= eregOfRexRM(pfx
,modrm
);
26507 assign( sV
, getYMMReg(rE
) );
26508 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26509 nameYMMReg(rV
), nameYMMReg(rG
));
26512 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26513 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26514 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26515 nameYMMReg(rV
), nameYMMReg(rG
));
26518 assign( dV
, getYMMReg(rV
) );
26519 breakupV256toV128s( dV
, &d1
, &d0
);
26520 breakupV256toV128s( sV
, &s1
, &s0
);
26521 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26522 mkexpr( math_HADDPD_128 ( d1
, s1
, isAdd
) ),
26523 mkexpr( math_HADDPD_128 ( d0
, s0
, isAdd
) ) ) );
26525 goto decode_success
;
26530 /* Note the Intel docs don't make sense for this. I think they
26531 are wrong. They seem to imply it is a store when in fact I
26532 think it is a load. Also it's unclear whether this is W0, W1
26534 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26535 if (haveF3no66noF2(pfx
)
26536 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26537 vassert(sz
== 4); /* even tho we are transferring 8, not 4. */
26538 UChar modrm
= getUChar(delta
);
26539 UInt rG
= gregOfRexRM(pfx
,modrm
);
26540 if (epartIsReg(modrm
)) {
26541 UInt rE
= eregOfRexRM(pfx
,modrm
);
26542 putXMMRegLane64( rG
, 0, getXMMRegLane64( rE
, 0 ));
26543 DIP("vmovq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
26546 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26547 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
26548 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26551 /* zero bits 255:64 */
26552 putXMMRegLane64( rG
, 1, mkU64(0) );
26553 putYMMRegLane128( rG
, 1, mkV128(0) );
26554 goto decode_success
;
26556 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26557 /* Moves from G to E, so is a store-form insn */
26558 /* Intel docs list this in the VMOVD entry for some reason. */
26559 if (have66noF2noF3(pfx
)
26560 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
26561 UChar modrm
= getUChar(delta
);
26562 UInt rG
= gregOfRexRM(pfx
,modrm
);
26563 if (epartIsReg(modrm
)) {
26564 UInt rE
= eregOfRexRM(pfx
,modrm
);
26565 DIP("vmovq %s,%s\n", nameXMMReg(rG
), nameIReg64(rE
));
26566 putIReg64(rE
, getXMMRegLane64(rG
, 0));
26569 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26570 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0) );
26571 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26574 goto decode_success
;
26576 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26577 /* Moves from G to E, so is a store-form insn */
26578 if (have66noF2noF3(pfx
)
26579 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26580 UChar modrm
= getUChar(delta
);
26581 UInt rG
= gregOfRexRM(pfx
,modrm
);
26582 if (epartIsReg(modrm
)) {
26583 UInt rE
= eregOfRexRM(pfx
,modrm
);
26584 DIP("vmovd %s,%s\n", nameXMMReg(rG
), nameIReg32(rE
));
26585 putIReg32(rE
, getXMMRegLane32(rG
, 0));
26588 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26589 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0) );
26590 DIP("vmovd %s,%s\n", dis_buf
, nameXMMReg(rG
));
26593 goto decode_success
;
26598 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26599 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26600 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26601 && 1==getVexL(pfx
)/*256*/) {
26602 UChar modrm
= getUChar(delta
);
26603 UInt rS
= gregOfRexRM(pfx
, modrm
);
26604 IRTemp tS
= newTemp(Ity_V256
);
26605 Bool isA
= have66noF2noF3(pfx
);
26606 HChar ch
= isA
? 'a' : 'u';
26607 assign(tS
, getYMMReg(rS
));
26608 if (epartIsReg(modrm
)) {
26609 UInt rD
= eregOfRexRM(pfx
, modrm
);
26611 putYMMReg(rD
, mkexpr(tS
));
26612 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
26614 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26617 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
26618 storeLE(mkexpr(addr
), mkexpr(tS
));
26619 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), dis_buf
);
26621 goto decode_success
;
26623 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26624 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26625 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26626 && 0==getVexL(pfx
)/*128*/) {
26627 UChar modrm
= getUChar(delta
);
26628 UInt rS
= gregOfRexRM(pfx
, modrm
);
26629 IRTemp tS
= newTemp(Ity_V128
);
26630 Bool isA
= have66noF2noF3(pfx
);
26631 HChar ch
= isA
? 'a' : 'u';
26632 assign(tS
, getXMMReg(rS
));
26633 if (epartIsReg(modrm
)) {
26634 UInt rD
= eregOfRexRM(pfx
, modrm
);
26636 putYMMRegLoAndZU(rD
, mkexpr(tS
));
26637 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
26639 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26642 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
26643 storeLE(mkexpr(addr
), mkexpr(tS
));
26644 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), dis_buf
);
26646 goto decode_success
;
26651 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26652 if (haveNo66noF2noF3(pfx
)
26653 && 0==getVexL(pfx
)/*LZ*/
26654 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26655 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
26657 delta
= dis_STMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26658 goto decode_success
;
26660 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26661 if (haveNo66noF2noF3(pfx
)
26662 && 0==getVexL(pfx
)/*LZ*/
26663 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26664 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
26666 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26667 goto decode_success
;
26672 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26673 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26674 if (haveF2no66noF3(pfx
)) {
26675 Long delta0
= delta
;
26676 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26677 "vcmpsd", False
/*!all_lanes*/,
26679 if (delta
> delta0
) goto decode_success
;
26680 /* else fall through -- decoding has failed */
26682 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26683 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26684 if (haveF3no66noF2(pfx
)) {
26685 Long delta0
= delta
;
26686 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26687 "vcmpss", False
/*!all_lanes*/,
26689 if (delta
> delta0
) goto decode_success
;
26690 /* else fall through -- decoding has failed */
26692 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26693 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26694 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26695 Long delta0
= delta
;
26696 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26697 "vcmppd", True
/*all_lanes*/,
26699 if (delta
> delta0
) goto decode_success
;
26700 /* else fall through -- decoding has failed */
26702 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26703 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26704 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26705 Long delta0
= delta
;
26706 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26707 "vcmppd", 8/*sz*/);
26708 if (delta
> delta0
) goto decode_success
;
26709 /* else fall through -- decoding has failed */
26711 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26712 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26713 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26714 Long delta0
= delta
;
26715 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26716 "vcmpps", True
/*all_lanes*/,
26718 if (delta
> delta0
) goto decode_success
;
26719 /* else fall through -- decoding has failed */
26721 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26722 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26723 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26724 Long delta0
= delta
;
26725 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26726 "vcmpps", 4/*sz*/);
26727 if (delta
> delta0
) goto decode_success
;
26728 /* else fall through -- decoding has failed */
26733 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26734 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26735 UChar modrm
= getUChar(delta
);
26736 UInt rG
= gregOfRexRM(pfx
, modrm
);
26737 UInt rV
= getVexNvvvv(pfx
);
26739 IRTemp new16
= newTemp(Ity_I16
);
26741 if ( epartIsReg( modrm
) ) {
26742 imm8
= (Int
)(getUChar(delta
+1) & 7);
26743 assign( new16
, unop(Iop_32to16
,
26744 getIReg32(eregOfRexRM(pfx
,modrm
))) );
26746 DIP( "vpinsrw $%d,%s,%s\n", imm8
,
26747 nameIReg32( eregOfRexRM(pfx
, modrm
) ), nameXMMReg(rG
) );
26749 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26750 imm8
= (Int
)(getUChar(delta
+alen
) & 7);
26751 assign( new16
, loadLE( Ity_I16
, mkexpr(addr
) ));
26753 DIP( "vpinsrw $%d,%s,%s\n",
26754 imm8
, dis_buf
, nameXMMReg(rG
) );
26757 IRTemp src_vec
= newTemp(Ity_V128
);
26758 assign(src_vec
, getXMMReg( rV
));
26759 IRTemp res_vec
= math_PINSRW_128( src_vec
, new16
, imm8
);
26760 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
26762 goto decode_success
;
26767 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26768 if (have66noF2noF3(pfx
)
26769 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26770 Long delta0
= delta
;
26771 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
26773 if (delta
> delta0
) goto decode_success
;
26774 /* else fall through -- decoding has failed */
26779 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26780 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26781 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26783 IRTemp eV
= newTemp(Ity_V128
);
26784 IRTemp vV
= newTemp(Ity_V128
);
26785 UInt modrm
= getUChar(delta
);
26786 UInt rG
= gregOfRexRM(pfx
,modrm
);
26787 UInt rV
= getVexNvvvv(pfx
);
26788 assign( vV
, getXMMReg(rV
) );
26789 if (epartIsReg(modrm
)) {
26790 UInt rE
= eregOfRexRM(pfx
,modrm
);
26791 assign( eV
, getXMMReg(rE
) );
26792 imm8
= (Int
)getUChar(delta
+1);
26794 DIP("vshufps $%d,%s,%s,%s\n",
26795 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26797 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26798 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26799 imm8
= (Int
)getUChar(delta
+alen
);
26801 DIP("vshufps $%d,%s,%s,%s\n",
26802 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26804 IRTemp res
= math_SHUFPS_128( eV
, vV
, imm8
);
26805 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26807 goto decode_success
;
26809 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26810 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26811 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26813 IRTemp eV
= newTemp(Ity_V256
);
26814 IRTemp vV
= newTemp(Ity_V256
);
26815 UInt modrm
= getUChar(delta
);
26816 UInt rG
= gregOfRexRM(pfx
,modrm
);
26817 UInt rV
= getVexNvvvv(pfx
);
26818 assign( vV
, getYMMReg(rV
) );
26819 if (epartIsReg(modrm
)) {
26820 UInt rE
= eregOfRexRM(pfx
,modrm
);
26821 assign( eV
, getYMMReg(rE
) );
26822 imm8
= (Int
)getUChar(delta
+1);
26824 DIP("vshufps $%d,%s,%s,%s\n",
26825 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26827 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26828 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26829 imm8
= (Int
)getUChar(delta
+alen
);
26831 DIP("vshufps $%d,%s,%s,%s\n",
26832 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26834 IRTemp res
= math_SHUFPS_256( eV
, vV
, imm8
);
26835 putYMMReg( rG
, mkexpr(res
) );
26837 goto decode_success
;
26839 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26840 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26841 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26843 IRTemp eV
= newTemp(Ity_V128
);
26844 IRTemp vV
= newTemp(Ity_V128
);
26845 UInt modrm
= getUChar(delta
);
26846 UInt rG
= gregOfRexRM(pfx
,modrm
);
26847 UInt rV
= getVexNvvvv(pfx
);
26848 assign( vV
, getXMMReg(rV
) );
26849 if (epartIsReg(modrm
)) {
26850 UInt rE
= eregOfRexRM(pfx
,modrm
);
26851 assign( eV
, getXMMReg(rE
) );
26852 imm8
= (Int
)getUChar(delta
+1);
26854 DIP("vshufpd $%d,%s,%s,%s\n",
26855 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26857 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26858 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26859 imm8
= (Int
)getUChar(delta
+alen
);
26861 DIP("vshufpd $%d,%s,%s,%s\n",
26862 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26864 IRTemp res
= math_SHUFPD_128( eV
, vV
, imm8
);
26865 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26867 goto decode_success
;
26869 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26870 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26871 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26873 IRTemp eV
= newTemp(Ity_V256
);
26874 IRTemp vV
= newTemp(Ity_V256
);
26875 UInt modrm
= getUChar(delta
);
26876 UInt rG
= gregOfRexRM(pfx
,modrm
);
26877 UInt rV
= getVexNvvvv(pfx
);
26878 assign( vV
, getYMMReg(rV
) );
26879 if (epartIsReg(modrm
)) {
26880 UInt rE
= eregOfRexRM(pfx
,modrm
);
26881 assign( eV
, getYMMReg(rE
) );
26882 imm8
= (Int
)getUChar(delta
+1);
26884 DIP("vshufpd $%d,%s,%s,%s\n",
26885 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26887 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26888 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26889 imm8
= (Int
)getUChar(delta
+alen
);
26891 DIP("vshufpd $%d,%s,%s,%s\n",
26892 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26894 IRTemp res
= math_SHUFPD_256( eV
, vV
, imm8
);
26895 putYMMReg( rG
, mkexpr(res
) );
26897 goto decode_success
;
26902 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26903 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26904 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26905 uses_vvvv
, vbi
, pfx
, delta
,
26906 "vaddsubpd", math_ADDSUBPD_128
);
26907 goto decode_success
;
26909 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26910 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26911 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26912 uses_vvvv
, vbi
, pfx
, delta
,
26913 "vaddsubpd", math_ADDSUBPD_256
);
26914 goto decode_success
;
26916 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26917 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26918 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26919 uses_vvvv
, vbi
, pfx
, delta
,
26920 "vaddsubps", math_ADDSUBPS_128
);
26921 goto decode_success
;
26923 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26924 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26925 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26926 uses_vvvv
, vbi
, pfx
, delta
,
26927 "vaddsubps", math_ADDSUBPS_256
);
26928 goto decode_success
;
26933 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26934 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26935 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26936 "vpsrlw", Iop_ShrN16x8
);
26938 goto decode_success
;
26941 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26942 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26943 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26944 "vpsrlw", Iop_ShrN16x16
);
26946 goto decode_success
;
26952 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26953 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26954 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26955 "vpsrld", Iop_ShrN32x4
);
26957 goto decode_success
;
26959 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26960 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26961 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26962 "vpsrld", Iop_ShrN32x8
);
26964 goto decode_success
;
26969 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26970 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26971 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26972 "vpsrlq", Iop_ShrN64x2
);
26974 goto decode_success
;
26976 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26977 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26978 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26979 "vpsrlq", Iop_ShrN64x4
);
26981 goto decode_success
;
26986 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26987 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26988 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26989 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26990 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x2
);
26991 goto decode_success
;
26993 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26994 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26995 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26996 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26997 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x4
);
26998 goto decode_success
;
27003 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
27004 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27005 delta
= dis_AVX128_E_V_to_G(
27006 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x8
);
27007 goto decode_success
;
27009 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
27010 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27011 delta
= dis_AVX256_E_V_to_G(
27012 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x16
);
27013 goto decode_success
;
27018 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
27019 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
27021 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
27022 && 0==getRexW(pfx
)/*this might be redundant, dunno*/) {
27023 UChar modrm
= getUChar(delta
);
27024 UInt rG
= gregOfRexRM(pfx
,modrm
);
27025 if (epartIsReg(modrm
)) {
27026 /* dst: lo half copied, hi half zeroed */
27027 UInt rE
= eregOfRexRM(pfx
,modrm
);
27028 putXMMRegLane64( rE
, 0, getXMMRegLane64( rG
, 0 ));
27029 /* zero bits 255:64 */
27030 putXMMRegLane64( rE
, 1, mkU64(0) );
27031 putYMMRegLane128( rE
, 1, mkV128(0) );
27032 DIP("vmovq %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
27034 goto decode_success
;
27036 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27037 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0 ));
27038 DIP("vmovq %s,%s\n", nameXMMReg(rG
), dis_buf
);
27040 goto decode_success
;
27046 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
27047 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27048 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
27049 goto decode_success
;
27051 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
27052 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27053 delta
= dis_PMOVMSKB_256( vbi
, pfx
, delta
);
27054 goto decode_success
;
27059 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
27060 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27061 delta
= dis_AVX128_E_V_to_G(
27062 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux16
);
27063 goto decode_success
;
27065 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
27066 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27067 delta
= dis_AVX256_E_V_to_G(
27068 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux32
);
27069 goto decode_success
;
27074 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
27075 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27076 delta
= dis_AVX128_E_V_to_G(
27077 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux8
);
27078 goto decode_success
;
27080 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
27081 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27082 delta
= dis_AVX256_E_V_to_G(
27083 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux16
);
27084 goto decode_success
;
27089 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
27090 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27091 delta
= dis_AVX128_E_V_to_G(
27092 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux16
);
27093 goto decode_success
;
27095 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
27096 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27097 delta
= dis_AVX256_E_V_to_G(
27098 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux32
);
27099 goto decode_success
;
27104 /* VPAND r/m, rV, r ::: r = rV & r/m */
27105 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
27106 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27107 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27108 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV128
);
27109 goto decode_success
;
27111 /* VPAND r/m, rV, r ::: r = rV & r/m */
27112 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
27113 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27114 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27115 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV256
);
27116 goto decode_success
;
27121 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
27122 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27123 delta
= dis_AVX128_E_V_to_G(
27124 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux16
);
27125 goto decode_success
;
27127 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
27128 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27129 delta
= dis_AVX256_E_V_to_G(
27130 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux32
);
27131 goto decode_success
;
27136 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
27137 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27138 delta
= dis_AVX128_E_V_to_G(
27139 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux8
);
27140 goto decode_success
;
27142 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27143 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27144 delta
= dis_AVX256_E_V_to_G(
27145 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux16
);
27146 goto decode_success
;
27151 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27152 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27153 delta
= dis_AVX128_E_V_to_G(
27154 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux16
);
27155 goto decode_success
;
27157 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27158 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27159 delta
= dis_AVX256_E_V_to_G(
27160 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux32
);
27161 goto decode_success
;
27166 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27167 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27168 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27169 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27170 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV128
,
27171 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27172 goto decode_success
;
27174 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27175 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27176 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27177 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27178 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV256
,
27179 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27180 goto decode_success
;
27185 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27186 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27187 delta
= dis_AVX128_E_V_to_G(
27188 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux16
);
27189 goto decode_success
;
27191 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27192 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27193 delta
= dis_AVX256_E_V_to_G(
27194 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux32
);
27195 goto decode_success
;
27200 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27201 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27202 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27203 "vpsraw", Iop_SarN16x8
);
27205 goto decode_success
;
27207 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27208 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27209 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27210 "vpsraw", Iop_SarN16x16
);
27212 goto decode_success
;
27217 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27218 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27219 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27220 "vpsrad", Iop_SarN32x4
);
27222 goto decode_success
;
27224 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27225 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27226 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27227 "vpsrad", Iop_SarN32x8
);
27229 goto decode_success
;
27234 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27235 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27236 delta
= dis_AVX128_E_V_to_G(
27237 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux8
);
27238 goto decode_success
;
27240 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27241 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27242 delta
= dis_AVX256_E_V_to_G(
27243 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux16
);
27244 goto decode_success
;
27249 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27250 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27251 delta
= dis_AVX128_E_V_to_G(
27252 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux8
);
27253 goto decode_success
;
27255 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27256 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27257 delta
= dis_AVX256_E_V_to_G(
27258 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux16
);
27259 goto decode_success
;
27264 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27265 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27266 delta
= dis_AVX128_E_V_to_G(
27267 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx8
);
27268 goto decode_success
;
27270 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27271 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27272 delta
= dis_AVX256_E_V_to_G(
27273 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx16
);
27274 goto decode_success
;
27279 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27280 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
27281 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, True
/*isAvx*/);
27282 goto decode_success
;
27284 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27285 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
27286 delta
= dis_CVTDQ2PD_256(vbi
, pfx
, delta
);
27287 goto decode_success
;
27289 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27290 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27291 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27293 goto decode_success
;
27295 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27296 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27297 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, True
/*r2zero*/);
27298 goto decode_success
;
27300 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27301 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27302 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27304 goto decode_success
;
27306 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27307 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27308 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, False
/*!r2zero*/);
27309 goto decode_success
;
27314 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27315 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27316 UChar modrm
= getUChar(delta
);
27317 UInt rG
= gregOfRexRM(pfx
,modrm
);
27318 if (!epartIsReg(modrm
)) {
27319 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27320 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
27321 storeLE( mkexpr(addr
), getXMMReg(rG
) );
27322 DIP("vmovntdq %s,%s\n", dis_buf
, nameXMMReg(rG
));
27324 goto decode_success
;
27326 /* else fall through */
27328 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27329 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27330 UChar modrm
= getUChar(delta
);
27331 UInt rG
= gregOfRexRM(pfx
,modrm
);
27332 if (!epartIsReg(modrm
)) {
27333 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27334 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
27335 storeLE( mkexpr(addr
), getYMMReg(rG
) );
27336 DIP("vmovntdq %s,%s\n", dis_buf
, nameYMMReg(rG
));
27338 goto decode_success
;
27340 /* else fall through */
27345 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27346 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27347 delta
= dis_AVX128_E_V_to_G(
27348 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx16
);
27349 goto decode_success
;
27351 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27352 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27353 delta
= dis_AVX256_E_V_to_G(
27354 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx32
);
27355 goto decode_success
;
27360 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27361 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27362 delta
= dis_AVX128_E_V_to_G(
27363 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx8
);
27364 goto decode_success
;
27366 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27367 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27368 delta
= dis_AVX256_E_V_to_G(
27369 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx16
);
27370 goto decode_success
;
27375 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27376 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27377 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27378 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27379 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx8
);
27380 goto decode_success
;
27382 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27383 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27384 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27385 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27386 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx16
);
27387 goto decode_success
;
27392 /* VPOR r/m, rV, r ::: r = rV | r/m */
27393 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27394 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27395 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27396 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV128
);
27397 goto decode_success
;
27399 /* VPOR r/m, rV, r ::: r = rV | r/m */
27400 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27401 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27402 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27403 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV256
);
27404 goto decode_success
;
27409 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27410 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27411 delta
= dis_AVX128_E_V_to_G(
27412 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx16
);
27413 goto decode_success
;
27415 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27416 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27417 delta
= dis_AVX256_E_V_to_G(
27418 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx32
);
27419 goto decode_success
;
27424 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27425 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27426 delta
= dis_AVX128_E_V_to_G(
27427 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx8
);
27428 goto decode_success
;
27430 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27431 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27432 delta
= dis_AVX256_E_V_to_G(
27433 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx16
);
27434 goto decode_success
;
27439 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27440 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27441 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27442 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27443 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx8
);
27444 goto decode_success
;
27446 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27447 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27448 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27449 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27450 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx16
);
27451 goto decode_success
;
27456 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27457 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27458 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27459 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27460 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV128
);
27461 goto decode_success
;
27463 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27464 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27465 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27466 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27467 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV256
);
27468 goto decode_success
;
27473 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27474 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27475 UChar modrm
= getUChar(delta
);
27476 UInt rD
= gregOfRexRM(pfx
, modrm
);
27477 IRTemp tD
= newTemp(Ity_V256
);
27478 if (epartIsReg(modrm
)) break;
27479 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27481 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
27482 DIP("vlddqu %s,%s\n", dis_buf
, nameYMMReg(rD
));
27483 putYMMReg(rD
, mkexpr(tD
));
27484 goto decode_success
;
27486 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27487 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27488 UChar modrm
= getUChar(delta
);
27489 UInt rD
= gregOfRexRM(pfx
, modrm
);
27490 IRTemp tD
= newTemp(Ity_V128
);
27491 if (epartIsReg(modrm
)) break;
27492 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27494 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
27495 DIP("vlddqu %s,%s\n", dis_buf
, nameXMMReg(rD
));
27496 putYMMRegLoAndZU(rD
, mkexpr(tD
));
27497 goto decode_success
;
27502 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27503 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27504 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27505 "vpsllw", Iop_ShlN16x8
);
27507 goto decode_success
;
27510 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27511 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27512 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27513 "vpsllw", Iop_ShlN16x16
);
27515 goto decode_success
;
27521 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27522 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27523 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27524 "vpslld", Iop_ShlN32x4
);
27526 goto decode_success
;
27528 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27529 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27530 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27531 "vpslld", Iop_ShlN32x8
);
27533 goto decode_success
;
27538 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27539 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27540 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27541 "vpsllq", Iop_ShlN64x2
);
27543 goto decode_success
;
27545 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27546 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27547 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27548 "vpsllq", Iop_ShlN64x4
);
27550 goto decode_success
;
27555 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27556 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27557 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27558 uses_vvvv
, vbi
, pfx
, delta
,
27559 "vpmuludq", math_PMULUDQ_128
);
27560 goto decode_success
;
27562 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27563 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27564 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27565 uses_vvvv
, vbi
, pfx
, delta
,
27566 "vpmuludq", math_PMULUDQ_256
);
27567 goto decode_success
;
27572 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27573 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27574 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27575 uses_vvvv
, vbi
, pfx
, delta
,
27576 "vpmaddwd", math_PMADDWD_128
);
27577 goto decode_success
;
27579 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27580 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27581 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27582 uses_vvvv
, vbi
, pfx
, delta
,
27583 "vpmaddwd", math_PMADDWD_256
);
27584 goto decode_success
;
27589 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27590 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27591 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27592 uses_vvvv
, vbi
, pfx
, delta
,
27593 "vpsadbw", math_PSADBW_128
);
27594 goto decode_success
;
27596 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27597 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27598 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27599 uses_vvvv
, vbi
, pfx
, delta
,
27600 "vpsadbw", math_PSADBW_256
);
27601 goto decode_success
;
27606 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27607 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
27608 && epartIsReg(getUChar(delta
))) {
27609 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, True
/*isAvx*/ );
27610 goto decode_success
;
27615 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27616 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27617 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27618 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27619 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x16
);
27620 goto decode_success
;
27622 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27623 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27624 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27625 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27626 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x32
);
27627 goto decode_success
;
27632 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27633 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27634 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27635 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27636 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x8
);
27637 goto decode_success
;
27639 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27640 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27641 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27642 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27643 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x16
);
27644 goto decode_success
;
27649 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27650 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27651 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27652 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27653 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x4
);
27654 goto decode_success
;
27656 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27657 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27658 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27659 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27660 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x8
);
27661 goto decode_success
;
27666 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27667 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27668 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27669 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27670 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x2
);
27671 goto decode_success
;
27673 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27674 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27675 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27676 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27677 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x4
);
27678 goto decode_success
;
27683 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27684 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27685 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27686 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27687 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x16
);
27688 goto decode_success
;
27690 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27691 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27692 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27693 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27694 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x32
);
27695 goto decode_success
;
27700 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27701 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27702 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27703 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27704 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x8
);
27705 goto decode_success
;
27707 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27708 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27709 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27710 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27711 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x16
);
27712 goto decode_success
;
27717 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27718 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27719 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27720 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27721 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x4
);
27722 goto decode_success
;
27724 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27725 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27726 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27727 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27728 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x8
);
27729 goto decode_success
;
27746 /*------------------------------------------------------------*/
27748 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27750 /*------------------------------------------------------------*/
27752 static IRTemp
math_PERMILPS_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27754 /* In the control vector, zero out all but the bottom two bits of
27755 each 32-bit lane. */
27756 IRExpr
* cv1
= binop(Iop_ShrN32x4
,
27757 binop(Iop_ShlN32x4
, mkexpr(ctrlV
), mkU8(30)),
27759 /* And use the resulting cleaned-up control vector as steering
27760 in a Perm operation. */
27761 IRTemp res
= newTemp(Ity_V128
);
27762 assign(res
, binop(Iop_Perm32x4
, mkexpr(dataV
), cv1
));
27766 static IRTemp
math_PERMILPS_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27768 IRTemp dHi
, dLo
, cHi
, cLo
;
27769 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27770 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27771 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27772 IRTemp rHi
= math_PERMILPS_VAR_128( dHi
, cHi
);
27773 IRTemp rLo
= math_PERMILPS_VAR_128( dLo
, cLo
);
27774 IRTemp res
= newTemp(Ity_V256
);
27775 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27779 static IRTemp
math_PERMILPD_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27781 /* No cleverness here .. */
27782 IRTemp dHi
, dLo
, cHi
, cLo
;
27783 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27784 breakupV128to64s( dataV
, &dHi
, &dLo
);
27785 breakupV128to64s( ctrlV
, &cHi
, &cLo
);
27787 = IRExpr_ITE( unop(Iop_64to1
,
27788 binop(Iop_Shr64
, mkexpr(cHi
), mkU8(1))),
27789 mkexpr(dHi
), mkexpr(dLo
) );
27791 = IRExpr_ITE( unop(Iop_64to1
,
27792 binop(Iop_Shr64
, mkexpr(cLo
), mkU8(1))),
27793 mkexpr(dHi
), mkexpr(dLo
) );
27794 IRTemp res
= newTemp(Ity_V128
);
27795 assign(res
, binop(Iop_64HLtoV128
, rHi
, rLo
));
27799 static IRTemp
math_PERMILPD_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27801 IRTemp dHi
, dLo
, cHi
, cLo
;
27802 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27803 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27804 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27805 IRTemp rHi
= math_PERMILPD_VAR_128( dHi
, cHi
);
27806 IRTemp rLo
= math_PERMILPD_VAR_128( dLo
, cLo
);
27807 IRTemp res
= newTemp(Ity_V256
);
27808 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27812 static IRTemp
math_VPERMD ( IRTemp ctrlV
, IRTemp dataV
)
27814 /* In the control vector, zero out all but the bottom three bits of
27815 each 32-bit lane. */
27816 IRExpr
* cv1
= binop(Iop_ShrN32x8
,
27817 binop(Iop_ShlN32x8
, mkexpr(ctrlV
), mkU8(29)),
27819 /* And use the resulting cleaned-up control vector as steering
27820 in a Perm operation. */
27821 IRTemp res
= newTemp(Ity_V256
);
27822 assign(res
, binop(Iop_Perm32x8
, mkexpr(dataV
), cv1
));
27826 static Long
dis_SHIFTX ( /*OUT*/Bool
* uses_vvvv
,
27827 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
27828 const HChar
* opname
, IROp op8
)
27832 Int size
= getRexW(pfx
) ? 8 : 4;
27833 IRType ty
= szToITy(size
);
27834 IRTemp src
= newTemp(ty
);
27835 IRTemp amt
= newTemp(ty
);
27836 UChar rm
= getUChar(delta
);
27838 assign( amt
, getIRegV(size
,pfx
) );
27839 if (epartIsReg(rm
)) {
27840 assign( src
, getIRegE(size
,pfx
,rm
) );
27841 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
),
27842 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
27845 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27846 assign( src
, loadLE(ty
, mkexpr(addr
)) );
27847 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
), dis_buf
,
27848 nameIRegG(size
,pfx
,rm
));
27852 putIRegG( size
, pfx
, rm
,
27853 binop(mkSizedOp(ty
,op8
), mkexpr(src
),
27854 narrowTo(Ity_I8
, binop(mkSizedOp(ty
,Iop_And8
), mkexpr(amt
),
27855 mkU(ty
,8*size
-1)))) );
27856 /* Flags aren't modified. */
27862 static Long
dis_FMA ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
, UChar opc
)
27864 UChar modrm
= getUChar(delta
);
27865 UInt rG
= gregOfRexRM(pfx
, modrm
);
27866 UInt rV
= getVexNvvvv(pfx
);
27867 Bool scalar
= (opc
& 0xF) > 7 && (opc
& 1);
27868 IRType ty
= getRexW(pfx
) ? Ity_F64
: Ity_F32
;
27869 IRType vty
= scalar
? ty
: (getVexL(pfx
) ? Ity_V256
: Ity_V128
);
27870 IRTemp addr
= IRTemp_INVALID
;
27874 const HChar
*suffix
;
27875 const HChar
*order
;
27876 Bool negateRes
= False
;
27877 Bool negateZeven
= False
;
27878 Bool negateZodd
= False
;
27881 switch (opc
& 0xF) {
27882 case 0x6: name
= "addsub"; negateZeven
= True
; break;
27883 case 0x7: name
= "subadd"; negateZodd
= True
; break;
27885 case 0x9: name
= "add"; break;
27887 case 0xB: name
= "sub"; negateZeven
= True
; negateZodd
= True
;
27890 case 0xD: name
= "add"; negateRes
= True
; negateZeven
= True
;
27891 negateZodd
= True
; break;
27893 case 0xF: name
= "sub"; negateRes
= True
; break;
27894 default: vpanic("dis_FMA(amd64)"); break;
27896 switch (opc
& 0xF0) {
27897 case 0x90: order
= "132"; break;
27898 case 0xA0: order
= "213"; break;
27899 case 0xB0: order
= "231"; break;
27900 default: vpanic("dis_FMA(amd64)"); break;
27903 suffix
= ty
== Ity_F64
? "sd" : "ss";
27905 suffix
= ty
== Ity_F64
? "pd" : "ps";
27908 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27909 count
= sizeofIRType(vty
) / sizeofIRType(ty
);
27910 vassert(count
== 1 || count
== 2 || count
== 4 || count
== 8);
27912 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27914 IRExpr
*sX
[8], *sY
[8], *sZ
[8], *res
[8];
27915 for (i
= 0; i
< 8; i
++) sX
[i
] = sY
[i
] = sZ
[i
] = res
[i
] = NULL
;
27917 IRExpr
* (*getYMMRegLane
)(UInt
,Int
)
27918 = ty
== Ity_F32
? getYMMRegLane32F
: getYMMRegLane64F
;
27919 void (*putYMMRegLane
)(UInt
,Int
,IRExpr
*)
27920 = ty
== Ity_F32
? putYMMRegLane32F
: putYMMRegLane64F
;
27922 for (i
= 0; i
< count
; i
++) {
27923 sX
[i
] = getYMMRegLane(rG
, i
);
27924 sZ
[i
] = getYMMRegLane(rV
, i
);
27927 if (epartIsReg(modrm
)) {
27928 UInt rE
= eregOfRexRM(pfx
, modrm
);
27930 for (i
= 0; i
< count
; i
++) {
27931 sY
[i
] = getYMMRegLane(rE
, i
);
27933 if (vty
== Ity_V256
) {
27934 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27935 name
, order
, suffix
, nameYMMReg(rE
), nameYMMReg(rV
),
27938 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27939 name
, order
, suffix
, nameXMMReg(rE
), nameXMMReg(rV
),
27943 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27945 for (i
= 0; i
< count
; i
++) {
27946 sY
[i
] = loadLE(ty
, binop(Iop_Add64
, mkexpr(addr
),
27947 mkU64(i
* sizeofIRType(ty
))));
27949 if (vty
== Ity_V256
) {
27950 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27951 name
, order
, suffix
, dis_buf
, nameYMMReg(rV
),
27954 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27955 name
, order
, suffix
, dis_buf
, nameXMMReg(rV
),
27960 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27961 order, swap them around. */
27963 # define COPY_ARR(_dst, _src) \
27964 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27966 if ((opc
& 0xF0) != 0x90) {
27968 COPY_ARR(temp
, sX
);
27969 if ((opc
& 0xF0) == 0xA0) {
27972 COPY_ARR(sY
, temp
);
27975 COPY_ARR(sZ
, temp
);
27981 for (i
= 0; i
< count
; i
++) {
27982 IROp opNEG
= ty
== Ity_F64
? Iop_NegF64
: Iop_NegF32
;
27983 if ((i
& 1) ? negateZodd
: negateZeven
) {
27984 sZ
[i
] = unop(opNEG
, sZ
[i
]);
27986 res
[i
] = IRExpr_Qop(ty
== Ity_F64
? Iop_MAddF64
: Iop_MAddF32
,
27987 get_FAKE_roundingmode(), sX
[i
], sY
[i
], sZ
[i
]);
27989 res
[i
] = unop(opNEG
, res
[i
]);
27993 for (i
= 0; i
< count
; i
++) {
27994 putYMMRegLane(rG
, i
, res
[i
]);
28000 case Ity_V128
: putYMMRegLane128(rG
, 1, mkV128(0)); /*fallthru*/
28001 case Ity_V256
: break;
28002 default: vassert(0);
28009 /* Masked load or masked store. */
28010 static ULong
dis_VMASKMOV ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
28011 Prefix pfx
, Long delta
,
28012 const HChar
* opname
, Bool isYMM
, IRType ty
,
28018 UChar modrm
= getUChar(delta
);
28019 UInt rG
= gregOfRexRM(pfx
,modrm
);
28020 UInt rV
= getVexNvvvv(pfx
);
28022 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28025 /**/ if (isLoad
&& isYMM
) {
28026 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
28028 else if (isLoad
&& !isYMM
) {
28029 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
28032 else if (!isLoad
&& isYMM
) {
28033 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rG
), nameYMMReg(rV
), dis_buf
);
28036 vassert(!isLoad
&& !isYMM
);
28037 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rG
), nameXMMReg(rV
), dis_buf
);
28040 vassert(ty
== Ity_I32
|| ty
== Ity_I64
);
28041 Bool laneIs32
= ty
== Ity_I32
;
28043 Int nLanes
= (isYMM
? 2 : 1) * (laneIs32
? 4 : 2);
28045 for (i
= 0; i
< nLanes
; i
++) {
28046 IRExpr
* shAmt
= laneIs32
? mkU8(31) : mkU8(63);
28047 IRExpr
* one
= laneIs32
? mkU32(1) : mkU64(1);
28048 IROp opSHR
= laneIs32
? Iop_Shr32
: Iop_Shr64
;
28049 IROp opEQ
= laneIs32
? Iop_CmpEQ32
: Iop_CmpEQ64
;
28050 IRExpr
* lane
= (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rV
, i
);
28052 IRTemp cond
= newTemp(Ity_I1
);
28053 assign(cond
, binop(opEQ
, binop(opSHR
, lane
, shAmt
), one
));
28055 IRTemp data
= newTemp(ty
);
28056 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
),
28057 mkU64(i
* (laneIs32
? 4 : 8)));
28061 Iend_LE
, laneIs32
? ILGop_Ident32
: ILGop_Ident64
,
28062 data
, ea
, laneIs32
? mkU32(0) : mkU64(0), mkexpr(cond
)
28064 (laneIs32
? putYMMRegLane32
: putYMMRegLane64
)( rG
, i
, mkexpr(data
) );
28066 assign(data
, (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rG
, i
));
28067 stmt( IRStmt_StoreG(Iend_LE
, ea
, mkexpr(data
), mkexpr(cond
)) );
28071 if (isLoad
&& !isYMM
)
28072 putYMMRegLane128( rG
, 1, mkV128(0) );
28080 static ULong
dis_VGATHER ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
28081 Prefix pfx
, Long delta
,
28082 const HChar
* opname
, Bool isYMM
,
28083 Bool isVM64x
, IRType ty
)
28086 Int alen
, i
, vscale
, count1
, count2
;
28088 UChar modrm
= getUChar(delta
);
28089 UInt rG
= gregOfRexRM(pfx
,modrm
);
28090 UInt rV
= getVexNvvvv(pfx
);
28092 IRType dstTy
= (isYMM
&& (ty
== Ity_I64
|| !isVM64x
)) ? Ity_V256
: Ity_V128
;
28093 IRType idxTy
= (isYMM
&& (ty
== Ity_I32
|| isVM64x
)) ? Ity_V256
: Ity_V128
;
28095 addr
= disAVSIBMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, &rI
,
28097 if (addr
== IRTemp_INVALID
|| rI
== rG
|| rI
== rV
|| rG
== rV
)
28099 if (dstTy
== Ity_V256
) {
28100 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rV
), dis_buf
, nameYMMReg(rG
) );
28102 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rV
), dis_buf
, nameXMMReg(rG
) );
28106 if (ty
== Ity_I32
) {
28107 count1
= isYMM
? 8 : 4;
28108 count2
= isVM64x
? count1
/ 2 : count1
;
28110 count1
= count2
= isYMM
? 4 : 2;
28113 /* First update the mask register to copies of the sign bit. */
28114 if (ty
== Ity_I32
) {
28116 putYMMReg( rV
, binop(Iop_SarN32x8
, getYMMReg( rV
), mkU8(31)) );
28118 putYMMRegLoAndZU( rV
, binop(Iop_SarN32x4
, getXMMReg( rV
), mkU8(31)) );
28120 for (i
= 0; i
< count1
; i
++) {
28121 putYMMRegLane64( rV
, i
, binop(Iop_Sar64
, getYMMRegLane64( rV
, i
),
28126 /* Next gather the individual elements. If any fault occurs, the
28127 corresponding mask element will be set and the loop stops. */
28128 for (i
= 0; i
< count2
; i
++) {
28129 IRExpr
*expr
, *addr_expr
;
28130 cond
= newTemp(Ity_I1
);
28132 binop(ty
== Ity_I32
? Iop_CmpLT32S
: Iop_CmpLT64S
,
28133 ty
== Ity_I32
? getYMMRegLane32( rV
, i
)
28134 : getYMMRegLane64( rV
, i
),
28136 expr
= ty
== Ity_I32
? getYMMRegLane32( rG
, i
)
28137 : getYMMRegLane64( rG
, i
);
28138 addr_expr
= isVM64x
? getYMMRegLane64( rI
, i
)
28139 : unop(Iop_32Sto64
, getYMMRegLane32( rI
, i
));
28141 case 2: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(1)); break;
28142 case 4: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(2)); break;
28143 case 8: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(3)); break;
28146 addr_expr
= binop(Iop_Add64
, mkexpr(addr
), addr_expr
);
28147 addr_expr
= handleAddrOverrides(vbi
, pfx
, addr_expr
);
28148 addr_expr
= IRExpr_ITE(mkexpr(cond
), addr_expr
, getIReg64(R_RSP
));
28149 expr
= IRExpr_ITE(mkexpr(cond
), loadLE(ty
, addr_expr
), expr
);
28150 if (ty
== Ity_I32
) {
28151 putYMMRegLane32( rG
, i
, expr
);
28152 putYMMRegLane32( rV
, i
, mkU32(0) );
28154 putYMMRegLane64( rG
, i
, expr
);
28155 putYMMRegLane64( rV
, i
, mkU64(0) );
28159 if (!isYMM
|| (ty
== Ity_I32
&& isVM64x
)) {
28160 if (ty
== Ity_I64
|| isYMM
)
28161 putYMMRegLane128( rV
, 1, mkV128(0) );
28162 else if (ty
== Ity_I32
&& count2
== 2) {
28163 putYMMRegLane64( rV
, 1, mkU64(0) );
28164 putYMMRegLane64( rG
, 1, mkU64(0) );
28166 putYMMRegLane128( rG
, 1, mkV128(0) );
28174 __attribute__((noinline
))
28176 Long
dis_ESC_0F38__VEX (
28177 /*MB_OUT*/DisResult
* dres
,
28178 /*OUT*/ Bool
* uses_vvvv
,
28179 const VexArchInfo
* archinfo
,
28180 const VexAbiInfo
* vbi
,
28181 Prefix pfx
, Int sz
, Long deltaIN
28184 IRTemp addr
= IRTemp_INVALID
;
28187 Long delta
= deltaIN
;
28188 UChar opc
= getUChar(delta
);
28190 *uses_vvvv
= False
;
28195 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28196 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28197 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28198 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28199 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_XMM
);
28200 goto decode_success
;
28202 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28203 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28204 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28205 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28206 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_YMM
);
28207 goto decode_success
;
28214 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28215 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28216 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28217 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28218 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28220 goto decode_success
;
28222 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28223 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28224 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28225 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28226 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28228 goto decode_success
;
28233 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28234 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28235 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28236 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28237 math_PMADDUBSW_128
);
28238 goto decode_success
;
28240 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28241 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28242 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28243 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28244 math_PMADDUBSW_256
);
28245 goto decode_success
;
28252 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28253 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28254 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28255 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28256 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28258 goto decode_success
;
28260 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28261 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28262 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28263 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28264 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28266 goto decode_success
;
28273 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28274 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28275 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28276 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28277 IRTemp sV
= newTemp(Ity_V128
);
28278 IRTemp dV
= newTemp(Ity_V128
);
28279 IRTemp sHi
, sLo
, dHi
, dLo
;
28280 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28283 UChar modrm
= getUChar(delta
);
28284 UInt rG
= gregOfRexRM(pfx
,modrm
);
28285 UInt rV
= getVexNvvvv(pfx
);
28288 case 0x08: laneszB
= 1; ch
= 'b'; break;
28289 case 0x09: laneszB
= 2; ch
= 'w'; break;
28290 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28291 default: vassert(0);
28294 assign( dV
, getXMMReg(rV
) );
28296 if (epartIsReg(modrm
)) {
28297 UInt rE
= eregOfRexRM(pfx
,modrm
);
28298 assign( sV
, getXMMReg(rE
) );
28300 DIP("vpsign%c %s,%s,%s\n", ch
, nameXMMReg(rE
),
28301 nameXMMReg(rV
), nameXMMReg(rG
));
28303 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28304 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28306 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28307 nameXMMReg(rV
), nameXMMReg(rG
));
28310 breakupV128to64s( dV
, &dHi
, &dLo
);
28311 breakupV128to64s( sV
, &sHi
, &sLo
);
28315 binop(Iop_64HLtoV128
,
28316 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
28317 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
28321 goto decode_success
;
28323 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28324 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28325 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28326 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28327 IRTemp sV
= newTemp(Ity_V256
);
28328 IRTemp dV
= newTemp(Ity_V256
);
28329 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28330 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
28331 d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28334 UChar modrm
= getUChar(delta
);
28335 UInt rG
= gregOfRexRM(pfx
,modrm
);
28336 UInt rV
= getVexNvvvv(pfx
);
28339 case 0x08: laneszB
= 1; ch
= 'b'; break;
28340 case 0x09: laneszB
= 2; ch
= 'w'; break;
28341 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28342 default: vassert(0);
28345 assign( dV
, getYMMReg(rV
) );
28347 if (epartIsReg(modrm
)) {
28348 UInt rE
= eregOfRexRM(pfx
,modrm
);
28349 assign( sV
, getYMMReg(rE
) );
28351 DIP("vpsign%c %s,%s,%s\n", ch
, nameYMMReg(rE
),
28352 nameYMMReg(rV
), nameYMMReg(rG
));
28354 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28355 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28357 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28358 nameYMMReg(rV
), nameYMMReg(rG
));
28361 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28362 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28366 binop( Iop_V128HLtoV256
,
28367 binop(Iop_64HLtoV128
,
28368 dis_PSIGN_helper( mkexpr(s3
), mkexpr(d3
), laneszB
),
28369 dis_PSIGN_helper( mkexpr(s2
), mkexpr(d2
), laneszB
)
28371 binop(Iop_64HLtoV128
,
28372 dis_PSIGN_helper( mkexpr(s1
), mkexpr(d1
), laneszB
),
28373 dis_PSIGN_helper( mkexpr(s0
), mkexpr(d0
), laneszB
)
28378 goto decode_success
;
28383 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28384 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28385 IRTemp sV
= newTemp(Ity_V128
);
28386 IRTemp dV
= newTemp(Ity_V128
);
28387 IRTemp sHi
, sLo
, dHi
, dLo
;
28388 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28389 UChar modrm
= getUChar(delta
);
28390 UInt rG
= gregOfRexRM(pfx
,modrm
);
28391 UInt rV
= getVexNvvvv(pfx
);
28393 assign( dV
, getXMMReg(rV
) );
28395 if (epartIsReg(modrm
)) {
28396 UInt rE
= eregOfRexRM(pfx
,modrm
);
28397 assign( sV
, getXMMReg(rE
) );
28399 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE
),
28400 nameXMMReg(rV
), nameXMMReg(rG
));
28402 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28403 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28405 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28406 nameXMMReg(rV
), nameXMMReg(rG
));
28409 breakupV128to64s( dV
, &dHi
, &dLo
);
28410 breakupV128to64s( sV
, &sHi
, &sLo
);
28414 binop(Iop_64HLtoV128
,
28415 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
28416 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
28420 goto decode_success
;
28422 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28423 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28424 IRTemp sV
= newTemp(Ity_V256
);
28425 IRTemp dV
= newTemp(Ity_V256
);
28426 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28427 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28428 UChar modrm
= getUChar(delta
);
28429 UInt rG
= gregOfRexRM(pfx
,modrm
);
28430 UInt rV
= getVexNvvvv(pfx
);
28432 assign( dV
, getYMMReg(rV
) );
28434 if (epartIsReg(modrm
)) {
28435 UInt rE
= eregOfRexRM(pfx
,modrm
);
28436 assign( sV
, getYMMReg(rE
) );
28438 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE
),
28439 nameYMMReg(rV
), nameYMMReg(rG
));
28441 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28442 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28444 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28445 nameYMMReg(rV
), nameYMMReg(rG
));
28448 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28449 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28453 binop(Iop_V128HLtoV256
,
28454 binop(Iop_64HLtoV128
,
28455 dis_PMULHRSW_helper( mkexpr(s3
), mkexpr(d3
) ),
28456 dis_PMULHRSW_helper( mkexpr(s2
), mkexpr(d2
) ) ),
28457 binop(Iop_64HLtoV128
,
28458 dis_PMULHRSW_helper( mkexpr(s1
), mkexpr(d1
) ),
28459 dis_PMULHRSW_helper( mkexpr(s0
), mkexpr(d0
) ) )
28463 dres
->hint
= Dis_HintVerbose
;
28464 goto decode_success
;
28469 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28470 if (have66noF2noF3(pfx
)
28471 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28472 UChar modrm
= getUChar(delta
);
28473 UInt rG
= gregOfRexRM(pfx
, modrm
);
28474 UInt rV
= getVexNvvvv(pfx
);
28475 IRTemp ctrlV
= newTemp(Ity_V128
);
28476 if (epartIsReg(modrm
)) {
28477 UInt rE
= eregOfRexRM(pfx
, modrm
);
28479 DIP("vpermilps %s,%s,%s\n",
28480 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28481 assign(ctrlV
, getXMMReg(rE
));
28483 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28485 DIP("vpermilps %s,%s,%s\n",
28486 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28487 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28489 IRTemp dataV
= newTemp(Ity_V128
);
28490 assign(dataV
, getXMMReg(rV
));
28491 IRTemp resV
= math_PERMILPS_VAR_128(dataV
, ctrlV
);
28492 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28494 goto decode_success
;
28496 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28497 if (have66noF2noF3(pfx
)
28498 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28499 UChar modrm
= getUChar(delta
);
28500 UInt rG
= gregOfRexRM(pfx
, modrm
);
28501 UInt rV
= getVexNvvvv(pfx
);
28502 IRTemp ctrlV
= newTemp(Ity_V256
);
28503 if (epartIsReg(modrm
)) {
28504 UInt rE
= eregOfRexRM(pfx
, modrm
);
28506 DIP("vpermilps %s,%s,%s\n",
28507 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28508 assign(ctrlV
, getYMMReg(rE
));
28510 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28512 DIP("vpermilps %s,%s,%s\n",
28513 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28514 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28516 IRTemp dataV
= newTemp(Ity_V256
);
28517 assign(dataV
, getYMMReg(rV
));
28518 IRTemp resV
= math_PERMILPS_VAR_256(dataV
, ctrlV
);
28519 putYMMReg(rG
, mkexpr(resV
));
28521 goto decode_success
;
28526 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28527 if (have66noF2noF3(pfx
)
28528 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28529 UChar modrm
= getUChar(delta
);
28530 UInt rG
= gregOfRexRM(pfx
, modrm
);
28531 UInt rV
= getVexNvvvv(pfx
);
28532 IRTemp ctrlV
= newTemp(Ity_V128
);
28533 if (epartIsReg(modrm
)) {
28534 UInt rE
= eregOfRexRM(pfx
, modrm
);
28536 DIP("vpermilpd %s,%s,%s\n",
28537 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28538 assign(ctrlV
, getXMMReg(rE
));
28540 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28542 DIP("vpermilpd %s,%s,%s\n",
28543 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28544 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28546 IRTemp dataV
= newTemp(Ity_V128
);
28547 assign(dataV
, getXMMReg(rV
));
28548 IRTemp resV
= math_PERMILPD_VAR_128(dataV
, ctrlV
);
28549 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28551 goto decode_success
;
28553 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28554 if (have66noF2noF3(pfx
)
28555 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28556 UChar modrm
= getUChar(delta
);
28557 UInt rG
= gregOfRexRM(pfx
, modrm
);
28558 UInt rV
= getVexNvvvv(pfx
);
28559 IRTemp ctrlV
= newTemp(Ity_V256
);
28560 if (epartIsReg(modrm
)) {
28561 UInt rE
= eregOfRexRM(pfx
, modrm
);
28563 DIP("vpermilpd %s,%s,%s\n",
28564 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28565 assign(ctrlV
, getYMMReg(rE
));
28567 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28569 DIP("vpermilpd %s,%s,%s\n",
28570 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28571 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28573 IRTemp dataV
= newTemp(Ity_V256
);
28574 assign(dataV
, getYMMReg(rV
));
28575 IRTemp resV
= math_PERMILPD_VAR_256(dataV
, ctrlV
);
28576 putYMMReg(rG
, mkexpr(resV
));
28578 goto decode_success
;
28583 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28584 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28585 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 32 );
28586 goto decode_success
;
28588 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28589 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28590 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 32 );
28591 goto decode_success
;
28596 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28597 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28598 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 64 );
28599 goto decode_success
;
28601 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28602 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28603 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 64 );
28604 goto decode_success
;
28609 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28610 if (have66noF2noF3(pfx
)
28611 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/
28612 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
28613 delta
= dis_VCVTPH2PS( vbi
, pfx
, delta
, /*is256bit=*/False
);
28614 goto decode_success
;
28616 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28617 if (have66noF2noF3(pfx
)
28618 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/
28619 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
28620 delta
= dis_VCVTPH2PS( vbi
, pfx
, delta
, /*is256bit=*/True
);
28621 goto decode_success
;
28626 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28627 if (have66noF2noF3(pfx
)
28628 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28629 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28630 uses_vvvv
, vbi
, pfx
, delta
, "vpermps", math_VPERMD
);
28631 goto decode_success
;
28636 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28637 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28638 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 0 );
28639 goto decode_success
;
28641 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28642 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28643 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 0 );
28644 goto decode_success
;
28649 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28650 if (have66noF2noF3(pfx
)
28651 && 0==getVexL(pfx
)/*128*/
28652 && !epartIsReg(getUChar(delta
))) {
28653 UChar modrm
= getUChar(delta
);
28654 UInt rG
= gregOfRexRM(pfx
, modrm
);
28655 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28657 DIP("vbroadcastss %s,%s\n", dis_buf
, nameXMMReg(rG
));
28658 IRTemp t32
= newTemp(Ity_I32
);
28659 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28660 IRTemp t64
= newTemp(Ity_I64
);
28661 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28662 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28663 putYMMRegLoAndZU(rG
, res
);
28664 goto decode_success
;
28666 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28667 if (have66noF2noF3(pfx
)
28668 && 1==getVexL(pfx
)/*256*/
28669 && !epartIsReg(getUChar(delta
))) {
28670 UChar modrm
= getUChar(delta
);
28671 UInt rG
= gregOfRexRM(pfx
, modrm
);
28672 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28674 DIP("vbroadcastss %s,%s\n", dis_buf
, nameYMMReg(rG
));
28675 IRTemp t32
= newTemp(Ity_I32
);
28676 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28677 IRTemp t64
= newTemp(Ity_I64
);
28678 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28679 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28680 mkexpr(t64
), mkexpr(t64
));
28681 putYMMReg(rG
, res
);
28682 goto decode_success
;
28684 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28685 if (have66noF2noF3(pfx
)
28686 && 0==getVexL(pfx
)/*128*/
28687 && epartIsReg(getUChar(delta
))) {
28688 UChar modrm
= getUChar(delta
);
28689 UInt rG
= gregOfRexRM(pfx
, modrm
);
28690 UInt rE
= eregOfRexRM(pfx
, modrm
);
28691 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
28692 IRTemp t32
= newTemp(Ity_I32
);
28693 assign(t32
, getXMMRegLane32(rE
, 0));
28694 IRTemp t64
= newTemp(Ity_I64
);
28695 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28696 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28697 putYMMRegLoAndZU(rG
, res
);
28699 goto decode_success
;
28701 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28702 if (have66noF2noF3(pfx
)
28703 && 1==getVexL(pfx
)/*256*/
28704 && epartIsReg(getUChar(delta
))) {
28705 UChar modrm
= getUChar(delta
);
28706 UInt rG
= gregOfRexRM(pfx
, modrm
);
28707 UInt rE
= eregOfRexRM(pfx
, modrm
);
28708 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28709 IRTemp t32
= newTemp(Ity_I32
);
28710 assign(t32
, getXMMRegLane32(rE
, 0));
28711 IRTemp t64
= newTemp(Ity_I64
);
28712 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28713 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28714 mkexpr(t64
), mkexpr(t64
));
28715 putYMMReg(rG
, res
);
28717 goto decode_success
;
28722 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28723 if (have66noF2noF3(pfx
)
28724 && 1==getVexL(pfx
)/*256*/
28725 && !epartIsReg(getUChar(delta
))) {
28726 UChar modrm
= getUChar(delta
);
28727 UInt rG
= gregOfRexRM(pfx
, modrm
);
28728 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28730 DIP("vbroadcastsd %s,%s\n", dis_buf
, nameYMMReg(rG
));
28731 IRTemp t64
= newTemp(Ity_I64
);
28732 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
28733 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28734 mkexpr(t64
), mkexpr(t64
));
28735 putYMMReg(rG
, res
);
28736 goto decode_success
;
28738 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28739 if (have66noF2noF3(pfx
)
28740 && 1==getVexL(pfx
)/*256*/
28741 && epartIsReg(getUChar(delta
))) {
28742 UChar modrm
= getUChar(delta
);
28743 UInt rG
= gregOfRexRM(pfx
, modrm
);
28744 UInt rE
= eregOfRexRM(pfx
, modrm
);
28745 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28746 IRTemp t64
= newTemp(Ity_I64
);
28747 assign(t64
, getXMMRegLane64(rE
, 0));
28748 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28749 mkexpr(t64
), mkexpr(t64
));
28750 putYMMReg(rG
, res
);
28752 goto decode_success
;
28757 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28758 if (have66noF2noF3(pfx
)
28759 && 1==getVexL(pfx
)/*256*/
28760 && !epartIsReg(getUChar(delta
))) {
28761 UChar modrm
= getUChar(delta
);
28762 UInt rG
= gregOfRexRM(pfx
, modrm
);
28763 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28765 DIP("vbroadcastf128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
28766 IRTemp t128
= newTemp(Ity_V128
);
28767 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
28768 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
28769 goto decode_success
;
28774 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28775 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28776 delta
= dis_AVX128_E_to_G_unary(
28777 uses_vvvv
, vbi
, pfx
, delta
,
28778 "vpabsb", math_PABS_XMM_pap1
);
28779 goto decode_success
;
28781 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28782 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28783 delta
= dis_AVX256_E_to_G_unary(
28784 uses_vvvv
, vbi
, pfx
, delta
,
28785 "vpabsb", math_PABS_YMM_pap1
);
28786 goto decode_success
;
28791 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28792 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28793 delta
= dis_AVX128_E_to_G_unary(
28794 uses_vvvv
, vbi
, pfx
, delta
,
28795 "vpabsw", math_PABS_XMM_pap2
);
28796 goto decode_success
;
28798 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28799 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28800 delta
= dis_AVX256_E_to_G_unary(
28801 uses_vvvv
, vbi
, pfx
, delta
,
28802 "vpabsw", math_PABS_YMM_pap2
);
28803 goto decode_success
;
28808 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28809 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28810 delta
= dis_AVX128_E_to_G_unary(
28811 uses_vvvv
, vbi
, pfx
, delta
,
28812 "vpabsd", math_PABS_XMM_pap4
);
28813 goto decode_success
;
28815 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28816 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28817 delta
= dis_AVX256_E_to_G_unary(
28818 uses_vvvv
, vbi
, pfx
, delta
,
28819 "vpabsd", math_PABS_YMM_pap4
);
28820 goto decode_success
;
28825 /* VPMOVSXBW xmm2/m64, xmm1 */
28826 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28827 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28828 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
28829 True
/*isAvx*/, False
/*!xIsZ*/ );
28830 goto decode_success
;
28832 /* VPMOVSXBW xmm2/m128, ymm1 */
28833 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28834 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28835 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28836 goto decode_success
;
28841 /* VPMOVSXBD xmm2/m32, xmm1 */
28842 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28843 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28844 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
28845 True
/*isAvx*/, False
/*!xIsZ*/ );
28846 goto decode_success
;
28848 /* VPMOVSXBD xmm2/m64, ymm1 */
28849 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28850 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28851 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28852 goto decode_success
;
28857 /* VPMOVSXBQ xmm2/m16, xmm1 */
28858 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28859 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28860 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28861 goto decode_success
;
28863 /* VPMOVSXBQ xmm2/m32, ymm1 */
28864 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28865 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28866 delta
= dis_PMOVSXBQ_256( vbi
, pfx
, delta
);
28867 goto decode_success
;
28872 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28873 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28874 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
28875 True
/*isAvx*/, False
/*!xIsZ*/ );
28876 goto decode_success
;
28878 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28879 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28880 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28881 goto decode_success
;
28886 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28887 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28888 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28889 goto decode_success
;
28891 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28892 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28893 delta
= dis_PMOVSXWQ_256( vbi
, pfx
, delta
);
28894 goto decode_success
;
28899 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28900 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28901 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
28902 True
/*isAvx*/, False
/*!xIsZ*/ );
28903 goto decode_success
;
28905 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28906 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28907 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28908 goto decode_success
;
28913 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28914 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28915 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28916 uses_vvvv
, vbi
, pfx
, delta
,
28917 "vpmuldq", math_PMULDQ_128
);
28918 goto decode_success
;
28920 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28921 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28922 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28923 uses_vvvv
, vbi
, pfx
, delta
,
28924 "vpmuldq", math_PMULDQ_256
);
28925 goto decode_success
;
28930 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28931 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28932 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28933 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28934 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x2
);
28935 goto decode_success
;
28937 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28938 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28939 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28940 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28941 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x4
);
28942 goto decode_success
;
28947 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28948 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28949 && !epartIsReg(getUChar(delta
))) {
28950 UChar modrm
= getUChar(delta
);
28951 UInt rD
= gregOfRexRM(pfx
, modrm
);
28952 IRTemp tD
= newTemp(Ity_V128
);
28953 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28955 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
28956 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
28957 DIP("vmovntdqa %s,%s\n", dis_buf
, nameXMMReg(rD
));
28958 putYMMRegLoAndZU(rD
, mkexpr(tD
));
28959 goto decode_success
;
28961 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28962 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28963 && !epartIsReg(getUChar(delta
))) {
28964 UChar modrm
= getUChar(delta
);
28965 UInt rD
= gregOfRexRM(pfx
, modrm
);
28966 IRTemp tD
= newTemp(Ity_V256
);
28967 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28969 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
28970 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
28971 DIP("vmovntdqa %s,%s\n", dis_buf
, nameYMMReg(rD
));
28972 putYMMReg(rD
, mkexpr(tD
));
28973 goto decode_success
;
28978 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28979 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28980 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28981 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28982 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28983 Iop_QNarrowBin32Sto16Ux8
, NULL
,
28984 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
28985 goto decode_success
;
28987 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28988 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28989 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28990 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28991 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28992 math_VPACKUSDW_YMM
);
28993 goto decode_success
;
28998 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28999 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29000 && 0==getRexW(pfx
)/*W0*/
29001 && !epartIsReg(getUChar(delta
))) {
29002 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29003 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
29004 goto decode_success
;
29006 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
29007 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29008 && 0==getRexW(pfx
)/*W0*/
29009 && !epartIsReg(getUChar(delta
))) {
29010 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29011 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
29012 goto decode_success
;
29017 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
29018 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29019 && 0==getRexW(pfx
)/*W0*/
29020 && !epartIsReg(getUChar(delta
))) {
29021 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29022 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
29023 goto decode_success
;
29025 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
29026 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29027 && 0==getRexW(pfx
)/*W0*/
29028 && !epartIsReg(getUChar(delta
))) {
29029 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29030 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
29031 goto decode_success
;
29036 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
29037 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29038 && 0==getRexW(pfx
)/*W0*/
29039 && !epartIsReg(getUChar(delta
))) {
29040 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29041 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
29042 goto decode_success
;
29044 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
29045 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29046 && 0==getRexW(pfx
)/*W0*/
29047 && !epartIsReg(getUChar(delta
))) {
29048 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29049 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
29050 goto decode_success
;
29055 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
29056 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29057 && 0==getRexW(pfx
)/*W0*/
29058 && !epartIsReg(getUChar(delta
))) {
29059 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29060 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
29061 goto decode_success
;
29063 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
29064 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29065 && 0==getRexW(pfx
)/*W0*/
29066 && !epartIsReg(getUChar(delta
))) {
29067 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29068 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
29069 goto decode_success
;
29074 /* VPMOVZXBW xmm2/m64, xmm1 */
29075 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
29076 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29077 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
29078 True
/*isAvx*/, True
/*xIsZ*/ );
29079 goto decode_success
;
29081 /* VPMOVZXBW xmm2/m128, ymm1 */
29082 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
29083 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29084 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29085 goto decode_success
;
29090 /* VPMOVZXBD xmm2/m32, xmm1 */
29091 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
29092 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29093 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
29094 True
/*isAvx*/, True
/*xIsZ*/ );
29095 goto decode_success
;
29097 /* VPMOVZXBD xmm2/m64, ymm1 */
29098 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
29099 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29100 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29101 goto decode_success
;
29106 /* VPMOVZXBQ xmm2/m16, xmm1 */
29107 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
29108 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29109 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29110 goto decode_success
;
29112 /* VPMOVZXBQ xmm2/m32, ymm1 */
29113 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
29114 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29115 delta
= dis_PMOVZXBQ_256( vbi
, pfx
, delta
);
29116 goto decode_success
;
29121 /* VPMOVZXWD xmm2/m64, xmm1 */
29122 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
29123 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29124 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
29125 True
/*isAvx*/, True
/*xIsZ*/ );
29126 goto decode_success
;
29128 /* VPMOVZXWD xmm2/m128, ymm1 */
29129 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
29130 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29131 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29132 goto decode_success
;
29137 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
29138 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29139 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29140 goto decode_success
;
29142 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29143 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29144 delta
= dis_PMOVZXWQ_256( vbi
, pfx
, delta
);
29145 goto decode_success
;
29150 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29151 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29152 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
29153 True
/*isAvx*/, True
/*xIsZ*/ );
29154 goto decode_success
;
29156 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29157 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29158 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29159 goto decode_success
;
29164 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29165 if (have66noF2noF3(pfx
)
29166 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
29167 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29168 uses_vvvv
, vbi
, pfx
, delta
, "vpermd", math_VPERMD
);
29169 goto decode_success
;
29174 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29175 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29176 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29177 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29178 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx2
);
29179 goto decode_success
;
29181 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29182 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29183 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29184 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29185 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx4
);
29186 goto decode_success
;
29191 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29192 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29193 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29194 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29195 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx16
);
29196 goto decode_success
;
29198 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29199 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29200 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29201 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29202 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx32
);
29203 goto decode_success
;
29208 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29209 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29210 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29211 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29212 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx4
);
29213 goto decode_success
;
29215 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29216 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29217 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29218 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29219 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx8
);
29220 goto decode_success
;
29225 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29226 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29227 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29228 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29229 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux8
);
29230 goto decode_success
;
29232 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29233 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29234 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29235 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29236 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux16
);
29237 goto decode_success
;
29242 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29243 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29244 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29245 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29246 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux4
);
29247 goto decode_success
;
29249 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29250 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29251 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29252 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29253 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux8
);
29254 goto decode_success
;
29259 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29260 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29261 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29262 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29263 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx16
);
29264 goto decode_success
;
29266 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29267 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29268 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29269 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29270 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx32
);
29271 goto decode_success
;
29276 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29277 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29278 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29279 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29280 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx4
);
29281 goto decode_success
;
29283 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29284 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29285 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29286 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29287 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx8
);
29288 goto decode_success
;
29293 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29294 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29295 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29296 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29297 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux8
);
29298 goto decode_success
;
29300 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29301 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29302 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29303 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29304 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux16
);
29305 goto decode_success
;
29310 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29311 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29312 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29313 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29314 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux4
);
29315 goto decode_success
;
29317 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29318 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29319 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29320 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29321 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux8
);
29322 goto decode_success
;
29327 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29328 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29329 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29330 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29331 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x4
);
29332 goto decode_success
;
29334 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29335 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29336 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29337 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29338 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x8
);
29339 goto decode_success
;
29344 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29345 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29346 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29347 goto decode_success
;
29352 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29353 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29354 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29355 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvd",
29356 Iop_Shr32
, 1==getVexL(pfx
) );
29358 goto decode_success
;
29360 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29361 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29362 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29363 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvq",
29364 Iop_Shr64
, 1==getVexL(pfx
) );
29366 goto decode_success
;
29371 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29372 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29373 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29374 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsravd",
29375 Iop_Sar32
, 1==getVexL(pfx
) );
29377 goto decode_success
;
29382 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29383 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29384 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29385 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvd",
29386 Iop_Shl32
, 1==getVexL(pfx
) );
29388 goto decode_success
;
29390 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29391 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29392 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29393 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvq",
29394 Iop_Shl64
, 1==getVexL(pfx
) );
29396 goto decode_success
;
29401 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29402 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29403 && 0==getRexW(pfx
)/*W0*/) {
29404 UChar modrm
= getUChar(delta
);
29405 UInt rG
= gregOfRexRM(pfx
, modrm
);
29406 IRTemp t32
= newTemp(Ity_I32
);
29407 if (epartIsReg(modrm
)) {
29408 UInt rE
= eregOfRexRM(pfx
, modrm
);
29410 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29411 assign(t32
, getXMMRegLane32(rE
, 0));
29413 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29415 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameXMMReg(rG
));
29416 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29418 IRTemp t64
= newTemp(Ity_I64
);
29419 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29420 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29421 putYMMRegLoAndZU(rG
, res
);
29422 goto decode_success
;
29424 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29425 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29426 && 0==getRexW(pfx
)/*W0*/) {
29427 UChar modrm
= getUChar(delta
);
29428 UInt rG
= gregOfRexRM(pfx
, modrm
);
29429 IRTemp t32
= newTemp(Ity_I32
);
29430 if (epartIsReg(modrm
)) {
29431 UInt rE
= eregOfRexRM(pfx
, modrm
);
29433 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29434 assign(t32
, getXMMRegLane32(rE
, 0));
29436 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29438 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameYMMReg(rG
));
29439 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29441 IRTemp t64
= newTemp(Ity_I64
);
29442 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29443 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29444 mkexpr(t64
), mkexpr(t64
));
29445 putYMMReg(rG
, res
);
29446 goto decode_success
;
29451 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29452 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29453 && 0==getRexW(pfx
)/*W0*/) {
29454 UChar modrm
= getUChar(delta
);
29455 UInt rG
= gregOfRexRM(pfx
, modrm
);
29456 IRTemp t64
= newTemp(Ity_I64
);
29457 if (epartIsReg(modrm
)) {
29458 UInt rE
= eregOfRexRM(pfx
, modrm
);
29460 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29461 assign(t64
, getXMMRegLane64(rE
, 0));
29463 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29465 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameXMMReg(rG
));
29466 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29468 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29469 putYMMRegLoAndZU(rG
, res
);
29470 goto decode_success
;
29472 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29473 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29474 && 0==getRexW(pfx
)/*W0*/) {
29475 UChar modrm
= getUChar(delta
);
29476 UInt rG
= gregOfRexRM(pfx
, modrm
);
29477 IRTemp t64
= newTemp(Ity_I64
);
29478 if (epartIsReg(modrm
)) {
29479 UInt rE
= eregOfRexRM(pfx
, modrm
);
29481 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29482 assign(t64
, getXMMRegLane64(rE
, 0));
29484 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29486 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameYMMReg(rG
));
29487 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29489 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29490 mkexpr(t64
), mkexpr(t64
));
29491 putYMMReg(rG
, res
);
29492 goto decode_success
;
29497 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29498 if (have66noF2noF3(pfx
)
29499 && 1==getVexL(pfx
)/*256*/
29500 && !epartIsReg(getUChar(delta
))) {
29501 UChar modrm
= getUChar(delta
);
29502 UInt rG
= gregOfRexRM(pfx
, modrm
);
29503 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29505 DIP("vbroadcasti128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
29506 IRTemp t128
= newTemp(Ity_V128
);
29507 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
29508 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
29509 goto decode_success
;
29514 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29515 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29516 && 0==getRexW(pfx
)/*W0*/) {
29517 UChar modrm
= getUChar(delta
);
29518 UInt rG
= gregOfRexRM(pfx
, modrm
);
29519 IRTemp t8
= newTemp(Ity_I8
);
29520 if (epartIsReg(modrm
)) {
29521 UInt rE
= eregOfRexRM(pfx
, modrm
);
29523 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29524 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29526 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29528 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameXMMReg(rG
));
29529 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29531 IRTemp t16
= newTemp(Ity_I16
);
29532 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29533 IRTemp t32
= newTemp(Ity_I32
);
29534 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29535 IRTemp t64
= newTemp(Ity_I64
);
29536 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29537 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29538 putYMMRegLoAndZU(rG
, res
);
29539 goto decode_success
;
29541 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29542 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29543 && 0==getRexW(pfx
)/*W0*/) {
29544 UChar modrm
= getUChar(delta
);
29545 UInt rG
= gregOfRexRM(pfx
, modrm
);
29546 IRTemp t8
= newTemp(Ity_I8
);
29547 if (epartIsReg(modrm
)) {
29548 UInt rE
= eregOfRexRM(pfx
, modrm
);
29550 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29551 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29553 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29555 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameYMMReg(rG
));
29556 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29558 IRTemp t16
= newTemp(Ity_I16
);
29559 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29560 IRTemp t32
= newTemp(Ity_I32
);
29561 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29562 IRTemp t64
= newTemp(Ity_I64
);
29563 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29564 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29565 mkexpr(t64
), mkexpr(t64
));
29566 putYMMReg(rG
, res
);
29567 goto decode_success
;
29572 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29573 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29574 && 0==getRexW(pfx
)/*W0*/) {
29575 UChar modrm
= getUChar(delta
);
29576 UInt rG
= gregOfRexRM(pfx
, modrm
);
29577 IRTemp t16
= newTemp(Ity_I16
);
29578 if (epartIsReg(modrm
)) {
29579 UInt rE
= eregOfRexRM(pfx
, modrm
);
29581 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29582 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29584 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29586 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameXMMReg(rG
));
29587 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29589 IRTemp t32
= newTemp(Ity_I32
);
29590 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29591 IRTemp t64
= newTemp(Ity_I64
);
29592 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29593 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29594 putYMMRegLoAndZU(rG
, res
);
29595 goto decode_success
;
29597 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29598 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29599 && 0==getRexW(pfx
)/*W0*/) {
29600 UChar modrm
= getUChar(delta
);
29601 UInt rG
= gregOfRexRM(pfx
, modrm
);
29602 IRTemp t16
= newTemp(Ity_I16
);
29603 if (epartIsReg(modrm
)) {
29604 UInt rE
= eregOfRexRM(pfx
, modrm
);
29606 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29607 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29609 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29611 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameYMMReg(rG
));
29612 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29614 IRTemp t32
= newTemp(Ity_I32
);
29615 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29616 IRTemp t64
= newTemp(Ity_I64
);
29617 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29618 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29619 mkexpr(t64
), mkexpr(t64
));
29620 putYMMReg(rG
, res
);
29621 goto decode_success
;
29626 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29627 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29628 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29629 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29630 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
29631 goto decode_success
;
29633 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29634 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29635 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29636 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29637 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
29638 goto decode_success
;
29640 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29641 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29642 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29643 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29644 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
29645 goto decode_success
;
29647 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29648 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29649 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29650 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29651 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
29652 goto decode_success
;
29657 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29658 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29659 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29660 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29661 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
29662 goto decode_success
;
29664 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29665 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29666 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29667 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29668 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
29669 goto decode_success
;
29671 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29672 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29673 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29674 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29675 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
29676 goto decode_success
;
29678 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29679 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29680 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29681 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29682 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
29683 goto decode_success
;
29688 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29689 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29690 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29691 Long delta0
= delta
;
29692 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29693 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29694 if (delta
!= delta0
)
29695 goto decode_success
;
29697 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29698 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29699 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29700 Long delta0
= delta
;
29701 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29702 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29703 if (delta
!= delta0
)
29704 goto decode_success
;
29706 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29707 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29708 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29709 Long delta0
= delta
;
29710 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29711 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29712 if (delta
!= delta0
)
29713 goto decode_success
;
29715 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29716 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29717 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29718 Long delta0
= delta
;
29719 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29720 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29721 if (delta
!= delta0
)
29722 goto decode_success
;
29727 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29728 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29729 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29730 Long delta0
= delta
;
29731 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29732 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29733 if (delta
!= delta0
)
29734 goto decode_success
;
29736 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29737 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29738 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29739 Long delta0
= delta
;
29740 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29741 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29742 if (delta
!= delta0
)
29743 goto decode_success
;
29745 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29746 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29747 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29748 Long delta0
= delta
;
29749 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29750 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29751 if (delta
!= delta0
)
29752 goto decode_success
;
29754 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29755 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29756 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29757 Long delta0
= delta
;
29758 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29759 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29760 if (delta
!= delta0
)
29761 goto decode_success
;
29766 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29767 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29768 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29769 Long delta0
= delta
;
29770 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29771 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29772 if (delta
!= delta0
)
29773 goto decode_success
;
29775 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29776 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29777 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29778 Long delta0
= delta
;
29779 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29780 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29781 if (delta
!= delta0
)
29782 goto decode_success
;
29784 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29785 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29786 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29787 Long delta0
= delta
;
29788 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29789 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29790 if (delta
!= delta0
)
29791 goto decode_success
;
29793 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29794 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29795 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29796 Long delta0
= delta
;
29797 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29798 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29799 if (delta
!= delta0
)
29800 goto decode_success
;
29805 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29806 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29807 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29808 Long delta0
= delta
;
29809 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29810 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29811 if (delta
!= delta0
)
29812 goto decode_success
;
29814 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29815 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29816 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29817 Long delta0
= delta
;
29818 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29819 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29820 if (delta
!= delta0
)
29821 goto decode_success
;
29823 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29824 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29825 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29826 Long delta0
= delta
;
29827 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29828 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29829 if (delta
!= delta0
)
29830 goto decode_success
;
29832 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29833 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29834 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29835 Long delta0
= delta
;
29836 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29837 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29838 if (delta
!= delta0
)
29839 goto decode_success
;
29843 case 0x96 ... 0x9F:
29844 case 0xA6 ... 0xAF:
29845 case 0xB6 ... 0xBF:
29846 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29847 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29848 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29849 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29850 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29851 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29852 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29853 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29854 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29855 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29856 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29857 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29858 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29859 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29860 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29861 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29862 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29863 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29864 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29865 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29866 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29867 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29868 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29869 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29870 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29871 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29872 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29873 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29874 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29875 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29876 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29877 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29878 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29879 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29880 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29881 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29882 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29883 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29884 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29885 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29886 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29887 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29888 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29889 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29890 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29891 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29892 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29893 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29894 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29895 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29896 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29897 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29898 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29899 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29900 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29901 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29902 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29903 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29904 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29905 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29906 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29907 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29908 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29909 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29910 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29911 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29912 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29913 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29914 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29915 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29916 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29917 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29918 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29919 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29920 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29921 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29922 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29923 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29924 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29925 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29926 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29927 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29928 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29929 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29930 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29931 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29932 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29933 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29934 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29935 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29936 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29937 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29938 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29939 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29940 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29941 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29942 if (have66noF2noF3(pfx
)) {
29943 delta
= dis_FMA( vbi
, pfx
, delta
, opc
);
29945 dres
->hint
= Dis_HintVerbose
;
29946 goto decode_success
;
29955 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29956 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29957 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29958 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29959 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29960 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29961 delta
= dis_AESx( vbi
, pfx
, delta
, True
/*!isAvx*/, opc
);
29962 if (opc
!= 0xDB) *uses_vvvv
= True
;
29963 goto decode_success
;
29968 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29969 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29970 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
29971 Int size
= getRexW(pfx
) ? 8 : 4;
29972 IRType ty
= szToITy(size
);
29973 IRTemp dst
= newTemp(ty
);
29974 IRTemp src1
= newTemp(ty
);
29975 IRTemp src2
= newTemp(ty
);
29976 UChar rm
= getUChar(delta
);
29978 assign( src1
, getIRegV(size
,pfx
) );
29979 if (epartIsReg(rm
)) {
29980 assign( src2
, getIRegE(size
,pfx
,rm
) );
29981 DIP("andn %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
29982 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
29985 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29986 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
29987 DIP("andn %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
29988 nameIRegG(size
,pfx
,rm
));
29992 assign( dst
, binop( mkSizedOp(ty
,Iop_And8
),
29993 unop( mkSizedOp(ty
,Iop_Not8
), mkexpr(src1
) ),
29995 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
29996 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29997 ? AMD64G_CC_OP_ANDN64
29998 : AMD64G_CC_OP_ANDN32
)) );
29999 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30000 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
30001 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30003 goto decode_success
;
30008 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
30009 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
30010 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
30011 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 3) {
30012 Int size
= getRexW(pfx
) ? 8 : 4;
30013 IRType ty
= szToITy(size
);
30014 IRTemp src
= newTemp(ty
);
30015 IRTemp dst
= newTemp(ty
);
30016 UChar rm
= getUChar(delta
);
30018 if (epartIsReg(rm
)) {
30019 assign( src
, getIRegE(size
,pfx
,rm
) );
30020 DIP("blsi %s,%s\n", nameIRegE(size
,pfx
,rm
),
30021 nameIRegV(size
,pfx
));
30024 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30025 assign( src
, loadLE(ty
, mkexpr(addr
)) );
30026 DIP("blsi %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
30030 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
30031 binop(mkSizedOp(ty
,Iop_Sub8
), mkU(ty
, 0),
30032 mkexpr(src
)), mkexpr(src
)) );
30033 putIRegV( size
, pfx
, mkexpr(dst
) );
30034 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30035 ? AMD64G_CC_OP_BLSI64
30036 : AMD64G_CC_OP_BLSI32
)) );
30037 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30038 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
30039 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30041 goto decode_success
;
30043 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
30044 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
30045 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
30046 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 2) {
30047 Int size
= getRexW(pfx
) ? 8 : 4;
30048 IRType ty
= szToITy(size
);
30049 IRTemp src
= newTemp(ty
);
30050 IRTemp dst
= newTemp(ty
);
30051 UChar rm
= getUChar(delta
);
30053 if (epartIsReg(rm
)) {
30054 assign( src
, getIRegE(size
,pfx
,rm
) );
30055 DIP("blsmsk %s,%s\n", nameIRegE(size
,pfx
,rm
),
30056 nameIRegV(size
,pfx
));
30059 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30060 assign( src
, loadLE(ty
, mkexpr(addr
)) );
30061 DIP("blsmsk %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
30065 assign( dst
, binop(mkSizedOp(ty
,Iop_Xor8
),
30066 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
30067 mkU(ty
, 1)), mkexpr(src
)) );
30068 putIRegV( size
, pfx
, mkexpr(dst
) );
30069 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30070 ? AMD64G_CC_OP_BLSMSK64
30071 : AMD64G_CC_OP_BLSMSK32
)) );
30072 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30073 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
30074 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30076 goto decode_success
;
30078 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
30079 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
30080 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
30081 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 1) {
30082 Int size
= getRexW(pfx
) ? 8 : 4;
30083 IRType ty
= szToITy(size
);
30084 IRTemp src
= newTemp(ty
);
30085 IRTemp dst
= newTemp(ty
);
30086 UChar rm
= getUChar(delta
);
30088 if (epartIsReg(rm
)) {
30089 assign( src
, getIRegE(size
,pfx
,rm
) );
30090 DIP("blsr %s,%s\n", nameIRegE(size
,pfx
,rm
),
30091 nameIRegV(size
,pfx
));
30094 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30095 assign( src
, loadLE(ty
, mkexpr(addr
)) );
30096 DIP("blsr %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
30100 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
30101 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
30102 mkU(ty
, 1)), mkexpr(src
)) );
30103 putIRegV( size
, pfx
, mkexpr(dst
) );
30104 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30105 ? AMD64G_CC_OP_BLSR64
30106 : AMD64G_CC_OP_BLSR32
)) );
30107 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30108 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
30109 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30111 goto decode_success
;
30116 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
30117 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
30118 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30119 Int size
= getRexW(pfx
) ? 8 : 4;
30120 IRType ty
= szToITy(size
);
30121 IRTemp dst
= newTemp(ty
);
30122 IRTemp src1
= newTemp(ty
);
30123 IRTemp src2
= newTemp(ty
);
30124 IRTemp start
= newTemp(Ity_I8
);
30125 IRTemp cond
= newTemp(Ity_I1
);
30126 UChar rm
= getUChar(delta
);
30128 assign( src2
, getIRegV(size
,pfx
) );
30129 if (epartIsReg(rm
)) {
30130 assign( src1
, getIRegE(size
,pfx
,rm
) );
30131 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
),
30132 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
30135 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30136 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
30137 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
30138 nameIRegG(size
,pfx
,rm
));
30142 assign( start
, narrowTo( Ity_I8
, mkexpr(src2
) ) );
30143 assign( cond
, binop(Iop_CmpLT32U
,
30144 unop(Iop_8Uto32
, mkexpr(start
)),
30146 /* if (start < opsize) {
30150 dst = (src1 << (opsize-start)) u>> (opsize-start);
30158 binop(Iop_CmpEQ8
, mkexpr(start
), mkU8(0)),
30161 mkSizedOp(ty
,Iop_Shr8
),
30163 mkSizedOp(ty
,Iop_Shl8
),
30165 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30167 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30173 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30174 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30175 ? AMD64G_CC_OP_BLSR64
30176 : AMD64G_CC_OP_BLSR32
)) );
30177 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30178 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(cond
))) );
30179 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30181 goto decode_success
;
30183 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30184 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30185 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30186 Int size
= getRexW(pfx
) ? 8 : 4;
30187 IRType ty
= szToITy(size
);
30188 IRTemp src
= newTemp(ty
);
30189 IRTemp mask
= newTemp(ty
);
30190 UChar rm
= getUChar(delta
);
30192 assign( src
, getIRegV(size
,pfx
) );
30193 if (epartIsReg(rm
)) {
30194 assign( mask
, getIRegE(size
,pfx
,rm
) );
30195 DIP("pdep %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30196 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30199 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30200 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30201 DIP("pdep %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30202 nameIRegG(size
,pfx
,rm
));
30206 IRExpr
** args
= mkIRExprVec_2( widenUto64(mkexpr(src
)),
30207 widenUto64(mkexpr(mask
)) );
30208 putIRegG( size
, pfx
, rm
,
30209 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30210 "amd64g_calculate_pdep",
30211 &amd64g_calculate_pdep
, args
)) );
30213 /* Flags aren't modified. */
30214 goto decode_success
;
30216 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30217 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30218 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30219 Int size
= getRexW(pfx
) ? 8 : 4;
30220 IRType ty
= szToITy(size
);
30221 IRTemp src
= newTemp(ty
);
30222 IRTemp mask
= newTemp(ty
);
30223 UChar rm
= getUChar(delta
);
30225 assign( src
, getIRegV(size
,pfx
) );
30226 if (epartIsReg(rm
)) {
30227 assign( mask
, getIRegE(size
,pfx
,rm
) );
30228 DIP("pext %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30229 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30232 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30233 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30234 DIP("pext %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30235 nameIRegG(size
,pfx
,rm
));
30239 /* First mask off bits not set in mask, they are ignored
30240 and it should be fine if they contain undefined values. */
30241 IRExpr
* masked
= binop(mkSizedOp(ty
,Iop_And8
),
30242 mkexpr(src
), mkexpr(mask
));
30243 IRExpr
** args
= mkIRExprVec_2( widenUto64(masked
),
30244 widenUto64(mkexpr(mask
)) );
30245 putIRegG( size
, pfx
, rm
,
30246 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30247 "amd64g_calculate_pext",
30248 &amd64g_calculate_pext
, args
)) );
30250 /* Flags aren't modified. */
30251 goto decode_success
;
30256 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30257 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30258 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30259 Int size
= getRexW(pfx
) ? 8 : 4;
30260 IRType ty
= szToITy(size
);
30261 IRTemp src1
= newTemp(ty
);
30262 IRTemp src2
= newTemp(ty
);
30263 IRTemp res
= newTemp(size
== 8 ? Ity_I128
: Ity_I64
);
30264 UChar rm
= getUChar(delta
);
30266 assign( src1
, getIRegRDX(size
) );
30267 if (epartIsReg(rm
)) {
30268 assign( src2
, getIRegE(size
,pfx
,rm
) );
30269 DIP("mulx %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30270 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30273 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30274 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
30275 DIP("mulx %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30276 nameIRegG(size
,pfx
,rm
));
30280 assign( res
, binop(size
== 8 ? Iop_MullU64
: Iop_MullU32
,
30281 mkexpr(src1
), mkexpr(src2
)) );
30282 putIRegV( size
, pfx
,
30283 unop(size
== 8 ? Iop_128to64
: Iop_64to32
, mkexpr(res
)) );
30284 putIRegG( size
, pfx
, rm
,
30285 unop(size
== 8 ? Iop_128HIto64
: Iop_64HIto32
,
30288 /* Flags aren't modified. */
30289 goto decode_success
;
30294 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30295 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30296 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30297 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "sarx", Iop_Sar8
);
30298 goto decode_success
;
30300 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30301 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30302 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30303 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shlx", Iop_Shl8
);
30304 goto decode_success
;
30306 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30307 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30308 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30309 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shrx", Iop_Shr8
);
30310 goto decode_success
;
30312 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30313 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30314 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30315 Int size
= getRexW(pfx
) ? 8 : 4;
30316 IRType ty
= szToITy(size
);
30317 IRTemp dst
= newTemp(ty
);
30318 IRTemp src1
= newTemp(ty
);
30319 IRTemp src2
= newTemp(ty
);
30320 IRTemp stle
= newTemp(Ity_I16
);
30321 IRTemp start
= newTemp(Ity_I8
);
30322 IRTemp len
= newTemp(Ity_I8
);
30323 UChar rm
= getUChar(delta
);
30325 assign( src2
, getIRegV(size
,pfx
) );
30326 if (epartIsReg(rm
)) {
30327 assign( src1
, getIRegE(size
,pfx
,rm
) );
30328 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
),
30329 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
30332 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30333 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
30334 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
30335 nameIRegG(size
,pfx
,rm
));
30339 assign( stle
, narrowTo( Ity_I16
, mkexpr(src2
) ) );
30340 assign( start
, unop( Iop_16to8
, mkexpr(stle
) ) );
30341 assign( len
, unop( Iop_16HIto8
, mkexpr(stle
) ) );
30342 /* if (start+len < opsize) {
30344 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30348 if (start < opsize)
30349 dst = src1 u>> start;
30355 binop(Iop_CmpLT32U
,
30357 unop(Iop_8Uto32
, mkexpr(start
)),
30358 unop(Iop_8Uto32
, mkexpr(len
))),
30361 binop(Iop_CmpEQ8
, mkexpr(len
), mkU8(0)),
30363 binop(mkSizedOp(ty
,Iop_Shr8
),
30364 binop(mkSizedOp(ty
,Iop_Shl8
), mkexpr(src1
),
30366 binop(Iop_Sub8
, mkU8(8*size
),
30369 binop(Iop_Sub8
, mkU8(8*size
),
30373 binop(Iop_CmpLT32U
,
30374 unop(Iop_8Uto32
, mkexpr(start
)),
30376 binop(mkSizedOp(ty
,Iop_Shr8
), mkexpr(src1
),
30382 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30383 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30384 ? AMD64G_CC_OP_ANDN64
30385 : AMD64G_CC_OP_ANDN32
)) );
30386 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30387 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
30388 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30390 goto decode_success
;
30410 static Long
decode_vregW(Int count
, Long delta
, UChar modrm
, Prefix pfx
,
30411 const VexAbiInfo
* vbi
, IRTemp
*v
, UInt
*dst
, Int swap
)
30413 v
[0] = newTemp(Ity_V128
);
30414 v
[1] = newTemp(Ity_V128
);
30415 v
[2] = newTemp(Ity_V128
);
30416 v
[3] = newTemp(Ity_V128
);
30417 IRTemp addr
= IRTemp_INVALID
;
30421 *dst
= gregOfRexRM(pfx
, modrm
);
30422 assign( v
[0], getXMMReg(*dst
) );
30424 if ( epartIsReg( modrm
) ) {
30425 UInt ereg
= eregOfRexRM(pfx
, modrm
);
30426 assign(swap
? v
[count
-1] : v
[count
-2], getXMMReg(ereg
) );
30427 DIS(dis_buf
, "%s", nameXMMReg(ereg
));
30429 Bool extra_byte
= (getUChar(delta
- 3) & 0xF) != 9;
30430 addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, extra_byte
);
30431 assign(swap
? v
[count
-1] : v
[count
-2], loadLE(Ity_V128
, mkexpr(addr
)));
30435 UInt vvvv
= getVexNvvvv(pfx
);
30438 DIP( "%s,%s", nameXMMReg(*dst
), dis_buf
);
30441 assign( swap
? v
[1] : v
[2], getXMMReg(vvvv
) );
30442 DIP( "%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
), dis_buf
);
30446 assign( v
[1], getXMMReg(vvvv
) );
30447 UInt src2
= getUChar(delta
+ 1) >> 4;
30448 assign( swap
? v
[2] : v
[3], getXMMReg(src2
) );
30449 DIP( "%s,%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
),
30450 nameXMMReg(src2
), dis_buf
);
30457 static Long
dis_FMA4 (Prefix pfx
, Long delta
, UChar opc
,
30458 Bool
* uses_vvvv
, const VexAbiInfo
* vbi
)
30463 UChar modrm
= getUChar(delta
);
30465 Bool zero_64F
= False
;
30466 Bool zero_96F
= False
;
30467 UInt is_F32
= ((opc
& 0x01) == 0x00) ? 1 : 0;
30468 Bool neg
= (opc
& 0xF0) == 0x70;
30469 Bool alt
= (opc
& 0xF0) == 0x50;
30470 Bool sub
= alt
? (opc
& 0x0E) != 0x0E : (opc
& 0x0C) == 0x0C;
30473 switch(opc
& 0xF) {
30474 case 0x0A: zero_96F
= (opc
>> 4) != 0x05; break;
30475 case 0x0B: zero_64F
= (opc
>> 4) != 0x05; break;
30476 case 0x0E: zero_96F
= (opc
>> 4) != 0x05; break;
30477 case 0x0F: zero_64F
= (opc
>> 4) != 0x05; break;
30480 DIP("vfm%s", neg
? "n" : "");
30481 if(alt
) DIP("%s", sub
? "add" : "sub");
30482 DIP("%s", sub
? "sub" : "add");
30483 DIP("%c ", (zero_64F
|| zero_96F
) ? 's' : 'p');
30484 DIP("%c ", is_F32
? 's' : 'd');
30485 delta
= decode_vregW(4, delta
, modrm
, pfx
, vbi
, operand
, &dst
, getRexW(pfx
));
30489 void (*putXMM
[2])(UInt
,Int
,IRExpr
*) = {&putXMMRegLane64F
, &putXMMRegLane32F
};
30491 IROp size_op
[] = {Iop_V128to64
, Iop_V128HIto64
, Iop_64to32
, Iop_64HIto32
};
30492 IROp neg_op
[] = {Iop_NegF64
, Iop_NegF32
};
30494 for(i
= 0; i
< is_F32
* 2 + 2; i
++) {
30495 for(j
= 0; j
< 3; j
++) {
30497 src
[j
] = unop(Iop_ReinterpI32asF32
,
30498 unop(size_op
[i
%2+2],
30500 mkexpr(operand
[j
+ 1])
30504 src
[j
] = unop(Iop_ReinterpI64asF64
,
30506 mkexpr(operand
[j
+ 1])
30510 putXMM
[is_F32
](dst
, i
, IRExpr_Qop(is_F32
? Iop_MAddF32
: Iop_MAddF64
,
30511 get_FAKE_roundingmode(),
30512 neg
? unop(neg_op
[is_F32
], src
[0])
30515 sub
? unop(neg_op
[is_F32
], src
[2])
30523 /* Zero out top bits of ymm/xmm register. */
30524 putYMMRegLane128( dst
, 1, mkV128(0) );
30526 if(zero_64F
|| zero_96F
) {
30527 putXMMRegLane64( dst
, 1, IRExpr_Const(IRConst_U64(0)));
30531 putXMMRegLane32( dst
, 1, IRExpr_Const(IRConst_U32(0)));
30537 /*------------------------------------------------------------*/
30539 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30541 /*------------------------------------------------------------*/
30543 static IRTemp
math_VPERMILPS_128 ( IRTemp sV
, UInt imm8
)
30545 vassert(imm8
< 256);
30546 IRTemp s3
, s2
, s1
, s0
;
30547 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30548 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
30549 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30550 : ((_nn)==2) ? s2 : s3)
30551 IRTemp res
= newTemp(Ity_V128
);
30552 assign(res
, mkV128from32s( SEL((imm8
>> 6) & 3),
30553 SEL((imm8
>> 4) & 3),
30554 SEL((imm8
>> 2) & 3),
30555 SEL((imm8
>> 0) & 3) ));
30560 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30561 static Long
dis_VCVTPS2PH ( const VexAbiInfo
* vbi
, Prefix pfx
,
30562 Long delta
, Bool is256bit
)
30564 /* This is a width-halving store or reg-reg move, that does conversion on the
30565 transferred data. */
30566 UChar modrm
= getUChar(delta
);
30567 UInt rG
= gregOfRexRM(pfx
, modrm
);
30568 IRTemp rm
= newTemp(Ity_I32
);
30569 IROp op
= is256bit
? Iop_F32toF16x8
: Iop_F32toF16x4
;
30570 IRExpr
* srcG
= (is256bit
? getYMMReg
: getXMMReg
)(rG
);
30572 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30573 is the same as the encoding for IRRoundingMode, we can use that value
30574 directly in the IR as a rounding mode. */
30576 if (epartIsReg(modrm
)) {
30577 UInt rE
= eregOfRexRM(pfx
, modrm
);
30579 UInt imm
= getUChar(delta
);
30580 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30581 IRExpr
* res
= binop(op
, mkexpr(rm
), srcG
);
30583 res
= unop(Iop_64UtoV128
, res
);
30584 putYMMRegLoAndZU(rE
, res
);
30585 DIP("vcvtps2ph $%u,%s,%s\n",
30586 imm
, (is256bit
? nameYMMReg
: nameXMMReg
)(rG
), nameXMMReg(rE
));
30590 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30592 UInt imm
= getUChar(delta
);
30593 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30594 IRExpr
* res
= binop(op
, mkexpr(rm
), srcG
);
30595 storeLE(mkexpr(addr
), res
);
30596 DIP("vcvtps2ph $%u,%s,%s\n",
30597 imm
, (is256bit
? nameYMMReg
: nameXMMReg
)(rG
), dis_buf
);
30600 /* doesn't use vvvv */
30604 __attribute__((noinline
))
30606 Long
dis_ESC_0F3A__VEX (
30607 /*MB_OUT*/DisResult
* dres
,
30608 /*OUT*/ Bool
* uses_vvvv
,
30609 const VexArchInfo
* archinfo
,
30610 const VexAbiInfo
* vbi
,
30611 Prefix pfx
, Int sz
, Long deltaIN
30614 IRTemp addr
= IRTemp_INVALID
;
30617 Long delta
= deltaIN
;
30618 UChar opc
= getUChar(delta
);
30620 *uses_vvvv
= False
;
30626 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30627 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30628 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
30629 && 1==getRexW(pfx
)/*W1*/) {
30630 UChar modrm
= getUChar(delta
);
30632 UInt rG
= gregOfRexRM(pfx
, modrm
);
30633 IRTemp sV
= newTemp(Ity_V256
);
30634 const HChar
*name
= opc
== 0 ? "vpermq" : "vpermpd";
30635 if (epartIsReg(modrm
)) {
30636 UInt rE
= eregOfRexRM(pfx
, modrm
);
30638 imm8
= getUChar(delta
);
30639 DIP("%s $%u,%s,%s\n",
30640 name
, imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30641 assign(sV
, getYMMReg(rE
));
30643 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30645 imm8
= getUChar(delta
);
30646 DIP("%s $%u,%s,%s\n",
30647 name
, imm8
, dis_buf
, nameYMMReg(rG
));
30648 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30652 s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
30653 breakupV256to64s(sV
, &s
[3], &s
[2], &s
[1], &s
[0]);
30654 IRTemp dV
= newTemp(Ity_V256
);
30655 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30656 mkexpr(s
[(imm8
>> 6) & 3]),
30657 mkexpr(s
[(imm8
>> 4) & 3]),
30658 mkexpr(s
[(imm8
>> 2) & 3]),
30659 mkexpr(s
[(imm8
>> 0) & 3])));
30660 putYMMReg(rG
, mkexpr(dV
));
30661 goto decode_success
;
30666 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30667 if (have66noF2noF3(pfx
)
30668 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
30669 UChar modrm
= getUChar(delta
);
30671 UInt rG
= gregOfRexRM(pfx
, modrm
);
30672 UInt rV
= getVexNvvvv(pfx
);
30673 IRTemp sV
= newTemp(Ity_V128
);
30674 IRTemp dV
= newTemp(Ity_V128
);
30677 assign(sV
, getXMMReg(rV
));
30678 if (epartIsReg(modrm
)) {
30679 UInt rE
= eregOfRexRM(pfx
, modrm
);
30681 imm8
= getUChar(delta
);
30682 DIP("vpblendd $%u,%s,%s,%s\n",
30683 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
30684 assign(dV
, getXMMReg(rE
));
30686 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30688 imm8
= getUChar(delta
);
30689 DIP("vpblendd $%u,%s,%s,%s\n",
30690 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
30691 assign(dV
, loadLE(Ity_V128
, mkexpr(addr
)));
30694 for (i
= 0; i
< 4; i
++) {
30695 s
[i
] = IRTemp_INVALID
;
30696 d
[i
] = IRTemp_INVALID
;
30698 breakupV128to32s( sV
, &s
[3], &s
[2], &s
[1], &s
[0] );
30699 breakupV128to32s( dV
, &d
[3], &d
[2], &d
[1], &d
[0] );
30700 for (i
= 0; i
< 4; i
++)
30701 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30702 putYMMRegLane128(rG
, 1, mkV128(0));
30704 goto decode_success
;
30706 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30707 if (have66noF2noF3(pfx
)
30708 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30709 UChar modrm
= getUChar(delta
);
30711 UInt rG
= gregOfRexRM(pfx
, modrm
);
30712 UInt rV
= getVexNvvvv(pfx
);
30713 IRTemp sV
= newTemp(Ity_V256
);
30714 IRTemp dV
= newTemp(Ity_V256
);
30717 assign(sV
, getYMMReg(rV
));
30718 if (epartIsReg(modrm
)) {
30719 UInt rE
= eregOfRexRM(pfx
, modrm
);
30721 imm8
= getUChar(delta
);
30722 DIP("vpblendd $%u,%s,%s,%s\n",
30723 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30724 assign(dV
, getYMMReg(rE
));
30726 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30728 imm8
= getUChar(delta
);
30729 DIP("vpblendd $%u,%s,%s,%s\n",
30730 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30731 assign(dV
, loadLE(Ity_V256
, mkexpr(addr
)));
30734 for (i
= 0; i
< 8; i
++) {
30735 s
[i
] = IRTemp_INVALID
;
30736 d
[i
] = IRTemp_INVALID
;
30738 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
30739 &s
[3], &s
[2], &s
[1], &s
[0] );
30740 breakupV256to32s( dV
, &d
[7], &d
[6], &d
[5], &d
[4],
30741 &d
[3], &d
[2], &d
[1], &d
[0] );
30742 for (i
= 0; i
< 8; i
++)
30743 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30745 goto decode_success
;
30750 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30751 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30752 UChar modrm
= getUChar(delta
);
30754 UInt rG
= gregOfRexRM(pfx
, modrm
);
30755 IRTemp sV
= newTemp(Ity_V256
);
30756 if (epartIsReg(modrm
)) {
30757 UInt rE
= eregOfRexRM(pfx
, modrm
);
30759 imm8
= getUChar(delta
);
30760 DIP("vpermilps $%u,%s,%s\n",
30761 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30762 assign(sV
, getYMMReg(rE
));
30764 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30766 imm8
= getUChar(delta
);
30767 DIP("vpermilps $%u,%s,%s\n",
30768 imm8
, dis_buf
, nameYMMReg(rG
));
30769 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30772 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
30773 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
30774 IRTemp dVhi
= math_VPERMILPS_128( sVhi
, imm8
);
30775 IRTemp dVlo
= math_VPERMILPS_128( sVlo
, imm8
);
30776 IRExpr
* res
= binop(Iop_V128HLtoV256
, mkexpr(dVhi
), mkexpr(dVlo
));
30777 putYMMReg(rG
, res
);
30778 goto decode_success
;
30780 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30781 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30782 UChar modrm
= getUChar(delta
);
30784 UInt rG
= gregOfRexRM(pfx
, modrm
);
30785 IRTemp sV
= newTemp(Ity_V128
);
30786 if (epartIsReg(modrm
)) {
30787 UInt rE
= eregOfRexRM(pfx
, modrm
);
30789 imm8
= getUChar(delta
);
30790 DIP("vpermilps $%u,%s,%s\n",
30791 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30792 assign(sV
, getXMMReg(rE
));
30794 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30796 imm8
= getUChar(delta
);
30797 DIP("vpermilps $%u,%s,%s\n",
30798 imm8
, dis_buf
, nameXMMReg(rG
));
30799 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30802 putYMMRegLoAndZU(rG
, mkexpr ( math_VPERMILPS_128 ( sV
, imm8
) ) );
30803 goto decode_success
;
30808 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30809 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30810 UChar modrm
= getUChar(delta
);
30812 UInt rG
= gregOfRexRM(pfx
, modrm
);
30813 IRTemp sV
= newTemp(Ity_V128
);
30814 if (epartIsReg(modrm
)) {
30815 UInt rE
= eregOfRexRM(pfx
, modrm
);
30817 imm8
= getUChar(delta
);
30818 DIP("vpermilpd $%u,%s,%s\n",
30819 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30820 assign(sV
, getXMMReg(rE
));
30822 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30824 imm8
= getUChar(delta
);
30825 DIP("vpermilpd $%u,%s,%s\n",
30826 imm8
, dis_buf
, nameXMMReg(rG
));
30827 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30830 IRTemp s1
= newTemp(Ity_I64
);
30831 IRTemp s0
= newTemp(Ity_I64
);
30832 assign(s1
, unop(Iop_V128HIto64
, mkexpr(sV
)));
30833 assign(s0
, unop(Iop_V128to64
, mkexpr(sV
)));
30834 IRTemp dV
= newTemp(Ity_V128
);
30835 assign(dV
, binop(Iop_64HLtoV128
,
30836 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30837 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30838 putYMMRegLoAndZU(rG
, mkexpr(dV
));
30839 goto decode_success
;
30841 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30842 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30843 UChar modrm
= getUChar(delta
);
30845 UInt rG
= gregOfRexRM(pfx
, modrm
);
30846 IRTemp sV
= newTemp(Ity_V256
);
30847 if (epartIsReg(modrm
)) {
30848 UInt rE
= eregOfRexRM(pfx
, modrm
);
30850 imm8
= getUChar(delta
);
30851 DIP("vpermilpd $%u,%s,%s\n",
30852 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30853 assign(sV
, getYMMReg(rE
));
30855 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30857 imm8
= getUChar(delta
);
30858 DIP("vpermilpd $%u,%s,%s\n",
30859 imm8
, dis_buf
, nameYMMReg(rG
));
30860 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30863 IRTemp s3
, s2
, s1
, s0
;
30864 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30865 breakupV256to64s(sV
, &s3
, &s2
, &s1
, &s0
);
30866 IRTemp dV
= newTemp(Ity_V256
);
30867 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30868 mkexpr((imm8
& (1<<3)) ? s3
: s2
),
30869 mkexpr((imm8
& (1<<2)) ? s3
: s2
),
30870 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30871 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30872 putYMMReg(rG
, mkexpr(dV
));
30873 goto decode_success
;
30878 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30879 if (have66noF2noF3(pfx
)
30880 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30881 UChar modrm
= getUChar(delta
);
30883 UInt rG
= gregOfRexRM(pfx
, modrm
);
30884 UInt rV
= getVexNvvvv(pfx
);
30885 IRTemp s00
= newTemp(Ity_V128
);
30886 IRTemp s01
= newTemp(Ity_V128
);
30887 IRTemp s10
= newTemp(Ity_V128
);
30888 IRTemp s11
= newTemp(Ity_V128
);
30889 assign(s00
, getYMMRegLane128(rV
, 0));
30890 assign(s01
, getYMMRegLane128(rV
, 1));
30891 if (epartIsReg(modrm
)) {
30892 UInt rE
= eregOfRexRM(pfx
, modrm
);
30894 imm8
= getUChar(delta
);
30895 DIP("vperm2f128 $%u,%s,%s,%s\n",
30896 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30897 assign(s10
, getYMMRegLane128(rE
, 0));
30898 assign(s11
, getYMMRegLane128(rE
, 1));
30900 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30902 imm8
= getUChar(delta
);
30903 DIP("vperm2f128 $%u,%s,%s,%s\n",
30904 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30905 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
30906 mkexpr(addr
), mkU64(0))));
30907 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
30908 mkexpr(addr
), mkU64(16))));
30911 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30912 : ((_nn)==2) ? s10 : s11)
30913 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
30914 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
30916 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
30917 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
30919 goto decode_success
;
30924 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30925 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30926 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30927 UChar modrm
= getUChar(delta
);
30928 UInt rG
= gregOfRexRM(pfx
, modrm
);
30929 IRTemp src
= newTemp(Ity_V128
);
30930 IRTemp s0
= IRTemp_INVALID
;
30931 IRTemp s1
= IRTemp_INVALID
;
30932 IRTemp s2
= IRTemp_INVALID
;
30933 IRTemp s3
= IRTemp_INVALID
;
30934 IRTemp rm
= newTemp(Ity_I32
);
30937 modrm
= getUChar(delta
);
30939 if (epartIsReg(modrm
)) {
30940 UInt rE
= eregOfRexRM(pfx
, modrm
);
30941 assign( src
, getXMMReg( rE
) );
30942 imm
= getUChar(delta
+1);
30943 if (imm
& ~15) break;
30945 DIP( "vroundps $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
30947 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30948 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
30949 imm
= getUChar(delta
+alen
);
30950 if (imm
& ~15) break;
30952 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
30955 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30956 that encoding is the same as the encoding for IRRoundingMode,
30957 we can use that value directly in the IR as a rounding
30959 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30961 breakupV128to32s( src
, &s3
, &s2
, &s1
, &s0
);
30962 putYMMRegLane128( rG
, 1, mkV128(0) );
30963 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30964 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30965 putYMMRegLane32F( rG
, 3, CVT(s3
) );
30966 putYMMRegLane32F( rG
, 2, CVT(s2
) );
30967 putYMMRegLane32F( rG
, 1, CVT(s1
) );
30968 putYMMRegLane32F( rG
, 0, CVT(s0
) );
30970 goto decode_success
;
30972 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30973 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30974 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30975 UChar modrm
= getUChar(delta
);
30976 UInt rG
= gregOfRexRM(pfx
, modrm
);
30977 IRTemp src
= newTemp(Ity_V256
);
30978 IRTemp s0
= IRTemp_INVALID
;
30979 IRTemp s1
= IRTemp_INVALID
;
30980 IRTemp s2
= IRTemp_INVALID
;
30981 IRTemp s3
= IRTemp_INVALID
;
30982 IRTemp s4
= IRTemp_INVALID
;
30983 IRTemp s5
= IRTemp_INVALID
;
30984 IRTemp s6
= IRTemp_INVALID
;
30985 IRTemp s7
= IRTemp_INVALID
;
30986 IRTemp rm
= newTemp(Ity_I32
);
30989 modrm
= getUChar(delta
);
30991 if (epartIsReg(modrm
)) {
30992 UInt rE
= eregOfRexRM(pfx
, modrm
);
30993 assign( src
, getYMMReg( rE
) );
30994 imm
= getUChar(delta
+1);
30995 if (imm
& ~15) break;
30997 DIP( "vroundps $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
30999 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31000 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
31001 imm
= getUChar(delta
+alen
);
31002 if (imm
& ~15) break;
31004 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
31007 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31008 that encoding is the same as the encoding for IRRoundingMode,
31009 we can use that value directly in the IR as a rounding
31011 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
31013 breakupV256to32s( src
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
31014 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
31015 unop(Iop_ReinterpI32asF32, mkexpr(s)))
31016 putYMMRegLane32F( rG
, 7, CVT(s7
) );
31017 putYMMRegLane32F( rG
, 6, CVT(s6
) );
31018 putYMMRegLane32F( rG
, 5, CVT(s5
) );
31019 putYMMRegLane32F( rG
, 4, CVT(s4
) );
31020 putYMMRegLane32F( rG
, 3, CVT(s3
) );
31021 putYMMRegLane32F( rG
, 2, CVT(s2
) );
31022 putYMMRegLane32F( rG
, 1, CVT(s1
) );
31023 putYMMRegLane32F( rG
, 0, CVT(s0
) );
31025 goto decode_success
;
31030 /* VROUNDPD imm8, xmm2/m128, xmm1 */
31031 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
31032 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31033 UChar modrm
= getUChar(delta
);
31034 UInt rG
= gregOfRexRM(pfx
, modrm
);
31035 IRTemp src
= newTemp(Ity_V128
);
31036 IRTemp s0
= IRTemp_INVALID
;
31037 IRTemp s1
= IRTemp_INVALID
;
31038 IRTemp rm
= newTemp(Ity_I32
);
31041 modrm
= getUChar(delta
);
31043 if (epartIsReg(modrm
)) {
31044 UInt rE
= eregOfRexRM(pfx
, modrm
);
31045 assign( src
, getXMMReg( rE
) );
31046 imm
= getUChar(delta
+1);
31047 if (imm
& ~15) break;
31049 DIP( "vroundpd $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
31051 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31052 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
31053 imm
= getUChar(delta
+alen
);
31054 if (imm
& ~15) break;
31056 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
31059 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31060 that encoding is the same as the encoding for IRRoundingMode,
31061 we can use that value directly in the IR as a rounding
31063 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
31065 breakupV128to64s( src
, &s1
, &s0
);
31066 putYMMRegLane128( rG
, 1, mkV128(0) );
31067 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31068 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31069 putYMMRegLane64F( rG
, 1, CVT(s1
) );
31070 putYMMRegLane64F( rG
, 0, CVT(s0
) );
31072 goto decode_success
;
31074 /* VROUNDPD imm8, ymm2/m256, ymm1 */
31075 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
31076 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31077 UChar modrm
= getUChar(delta
);
31078 UInt rG
= gregOfRexRM(pfx
, modrm
);
31079 IRTemp src
= newTemp(Ity_V256
);
31080 IRTemp s0
= IRTemp_INVALID
;
31081 IRTemp s1
= IRTemp_INVALID
;
31082 IRTemp s2
= IRTemp_INVALID
;
31083 IRTemp s3
= IRTemp_INVALID
;
31084 IRTemp rm
= newTemp(Ity_I32
);
31087 modrm
= getUChar(delta
);
31089 if (epartIsReg(modrm
)) {
31090 UInt rE
= eregOfRexRM(pfx
, modrm
);
31091 assign( src
, getYMMReg( rE
) );
31092 imm
= getUChar(delta
+1);
31093 if (imm
& ~15) break;
31095 DIP( "vroundpd $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
31097 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31098 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
31099 imm
= getUChar(delta
+alen
);
31100 if (imm
& ~15) break;
31102 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
31105 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31106 that encoding is the same as the encoding for IRRoundingMode,
31107 we can use that value directly in the IR as a rounding
31109 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
31111 breakupV256to64s( src
, &s3
, &s2
, &s1
, &s0
);
31112 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31113 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31114 putYMMRegLane64F( rG
, 3, CVT(s3
) );
31115 putYMMRegLane64F( rG
, 2, CVT(s2
) );
31116 putYMMRegLane64F( rG
, 1, CVT(s1
) );
31117 putYMMRegLane64F( rG
, 0, CVT(s0
) );
31119 goto decode_success
;
31125 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
31126 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
31127 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
31128 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
31129 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31130 UChar modrm
= getUChar(delta
);
31131 UInt rG
= gregOfRexRM(pfx
, modrm
);
31132 UInt rV
= getVexNvvvv(pfx
);
31133 Bool isD
= opc
== 0x0B;
31134 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
31135 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
31138 if (epartIsReg(modrm
)) {
31139 UInt rE
= eregOfRexRM(pfx
, modrm
);
31141 isD
? getXMMRegLane64F(rE
, 0) : getXMMRegLane32F(rE
, 0) );
31142 imm
= getUChar(delta
+1);
31143 if (imm
& ~15) break;
31145 DIP( "vrounds%c $%d,%s,%s,%s\n",
31147 imm
, nameXMMReg( rE
), nameXMMReg( rV
), nameXMMReg( rG
) );
31149 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31150 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
31151 imm
= getUChar(delta
+alen
);
31152 if (imm
& ~15) break;
31154 DIP( "vrounds%c $%d,%s,%s,%s\n",
31156 imm
, dis_buf
, nameXMMReg( rV
), nameXMMReg( rG
) );
31159 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31160 that encoding is the same as the encoding for IRRoundingMode,
31161 we can use that value directly in the IR as a rounding
31163 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
31164 (imm
& 4) ? get_sse_roundingmode()
31169 putXMMRegLane64F( rG
, 0, mkexpr(res
) );
31171 putXMMRegLane32F( rG
, 0, mkexpr(res
) );
31172 putXMMRegLane32F( rG
, 1, getXMMRegLane32F( rV
, 1 ) );
31174 putXMMRegLane64F( rG
, 1, getXMMRegLane64F( rV
, 1 ) );
31175 putYMMRegLane128( rG
, 1, mkV128(0) );
31177 goto decode_success
;
31182 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31183 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31184 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31185 UChar modrm
= getUChar(delta
);
31187 UInt rG
= gregOfRexRM(pfx
, modrm
);
31188 UInt rV
= getVexNvvvv(pfx
);
31189 IRTemp sV
= newTemp(Ity_V256
);
31190 IRTemp sE
= newTemp(Ity_V256
);
31191 assign ( sV
, getYMMReg(rV
) );
31192 if (epartIsReg(modrm
)) {
31193 UInt rE
= eregOfRexRM(pfx
, modrm
);
31195 imm8
= getUChar(delta
);
31196 DIP("vblendps $%u,%s,%s,%s\n",
31197 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31198 assign(sE
, getYMMReg(rE
));
31200 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31202 imm8
= getUChar(delta
);
31203 DIP("vblendps $%u,%s,%s,%s\n",
31204 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31205 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31209 mkexpr( math_BLENDPS_256( sE
, sV
, imm8
) ) );
31211 goto decode_success
;
31213 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31214 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31215 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31216 UChar modrm
= getUChar(delta
);
31218 UInt rG
= gregOfRexRM(pfx
, modrm
);
31219 UInt rV
= getVexNvvvv(pfx
);
31220 IRTemp sV
= newTemp(Ity_V128
);
31221 IRTemp sE
= newTemp(Ity_V128
);
31222 assign ( sV
, getXMMReg(rV
) );
31223 if (epartIsReg(modrm
)) {
31224 UInt rE
= eregOfRexRM(pfx
, modrm
);
31226 imm8
= getUChar(delta
);
31227 DIP("vblendps $%u,%s,%s,%s\n",
31228 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31229 assign(sE
, getXMMReg(rE
));
31231 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31233 imm8
= getUChar(delta
);
31234 DIP("vblendps $%u,%s,%s,%s\n",
31235 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31236 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31239 putYMMRegLoAndZU( rG
,
31240 mkexpr( math_BLENDPS_128( sE
, sV
, imm8
) ) );
31242 goto decode_success
;
31247 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31248 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31249 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31250 UChar modrm
= getUChar(delta
);
31252 UInt rG
= gregOfRexRM(pfx
, modrm
);
31253 UInt rV
= getVexNvvvv(pfx
);
31254 IRTemp sV
= newTemp(Ity_V256
);
31255 IRTemp sE
= newTemp(Ity_V256
);
31256 assign ( sV
, getYMMReg(rV
) );
31257 if (epartIsReg(modrm
)) {
31258 UInt rE
= eregOfRexRM(pfx
, modrm
);
31260 imm8
= getUChar(delta
);
31261 DIP("vblendpd $%u,%s,%s,%s\n",
31262 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31263 assign(sE
, getYMMReg(rE
));
31265 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31267 imm8
= getUChar(delta
);
31268 DIP("vblendpd $%u,%s,%s,%s\n",
31269 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31270 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31274 mkexpr( math_BLENDPD_256( sE
, sV
, imm8
) ) );
31276 goto decode_success
;
31278 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31279 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31280 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31281 UChar modrm
= getUChar(delta
);
31283 UInt rG
= gregOfRexRM(pfx
, modrm
);
31284 UInt rV
= getVexNvvvv(pfx
);
31285 IRTemp sV
= newTemp(Ity_V128
);
31286 IRTemp sE
= newTemp(Ity_V128
);
31287 assign ( sV
, getXMMReg(rV
) );
31288 if (epartIsReg(modrm
)) {
31289 UInt rE
= eregOfRexRM(pfx
, modrm
);
31291 imm8
= getUChar(delta
);
31292 DIP("vblendpd $%u,%s,%s,%s\n",
31293 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31294 assign(sE
, getXMMReg(rE
));
31296 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31298 imm8
= getUChar(delta
);
31299 DIP("vblendpd $%u,%s,%s,%s\n",
31300 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31301 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31304 putYMMRegLoAndZU( rG
,
31305 mkexpr( math_BLENDPD_128( sE
, sV
, imm8
) ) );
31307 goto decode_success
;
31312 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31313 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31314 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31315 UChar modrm
= getUChar(delta
);
31317 UInt rG
= gregOfRexRM(pfx
, modrm
);
31318 UInt rV
= getVexNvvvv(pfx
);
31319 IRTemp sV
= newTemp(Ity_V128
);
31320 IRTemp sE
= newTemp(Ity_V128
);
31321 assign ( sV
, getXMMReg(rV
) );
31322 if (epartIsReg(modrm
)) {
31323 UInt rE
= eregOfRexRM(pfx
, modrm
);
31325 imm8
= getUChar(delta
);
31326 DIP("vpblendw $%u,%s,%s,%s\n",
31327 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31328 assign(sE
, getXMMReg(rE
));
31330 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31332 imm8
= getUChar(delta
);
31333 DIP("vpblendw $%u,%s,%s,%s\n",
31334 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31335 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31338 putYMMRegLoAndZU( rG
,
31339 mkexpr( math_PBLENDW_128( sE
, sV
, imm8
) ) );
31341 goto decode_success
;
31343 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31344 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31345 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31346 UChar modrm
= getUChar(delta
);
31348 UInt rG
= gregOfRexRM(pfx
, modrm
);
31349 UInt rV
= getVexNvvvv(pfx
);
31350 IRTemp sV
= newTemp(Ity_V256
);
31351 IRTemp sE
= newTemp(Ity_V256
);
31352 IRTemp sVhi
, sVlo
, sEhi
, sElo
;
31353 sVhi
= sVlo
= sEhi
= sElo
= IRTemp_INVALID
;
31354 assign ( sV
, getYMMReg(rV
) );
31355 if (epartIsReg(modrm
)) {
31356 UInt rE
= eregOfRexRM(pfx
, modrm
);
31358 imm8
= getUChar(delta
);
31359 DIP("vpblendw $%u,%s,%s,%s\n",
31360 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31361 assign(sE
, getYMMReg(rE
));
31363 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31365 imm8
= getUChar(delta
);
31366 DIP("vpblendw $%u,%s,%s,%s\n",
31367 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31368 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31371 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
31372 breakupV256toV128s( sE
, &sEhi
, &sElo
);
31373 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31374 mkexpr( math_PBLENDW_128( sEhi
, sVhi
, imm8
) ),
31375 mkexpr( math_PBLENDW_128( sElo
, sVlo
, imm8
) ) ) );
31377 goto decode_success
;
31382 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31383 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31384 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31385 UChar modrm
= getUChar(delta
);
31386 UInt rG
= gregOfRexRM(pfx
, modrm
);
31387 UInt rV
= getVexNvvvv(pfx
);
31388 IRTemp sV
= newTemp(Ity_V128
);
31389 IRTemp dV
= newTemp(Ity_V128
);
31392 assign( dV
, getXMMReg(rV
) );
31394 if ( epartIsReg( modrm
) ) {
31395 UInt rE
= eregOfRexRM(pfx
, modrm
);
31396 assign( sV
, getXMMReg(rE
) );
31397 imm8
= getUChar(delta
+1);
31399 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameXMMReg(rE
),
31400 nameXMMReg(rV
), nameXMMReg(rG
));
31402 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31403 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
31404 imm8
= getUChar(delta
+alen
);
31406 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31407 nameXMMReg(rV
), nameXMMReg(rG
));
31410 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, imm8
);
31411 putYMMRegLoAndZU( rG
, mkexpr(res
) );
31413 goto decode_success
;
31415 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31416 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31417 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31418 UChar modrm
= getUChar(delta
);
31419 UInt rG
= gregOfRexRM(pfx
, modrm
);
31420 UInt rV
= getVexNvvvv(pfx
);
31421 IRTemp sV
= newTemp(Ity_V256
);
31422 IRTemp dV
= newTemp(Ity_V256
);
31423 IRTemp sHi
, sLo
, dHi
, dLo
;
31424 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31427 assign( dV
, getYMMReg(rV
) );
31429 if ( epartIsReg( modrm
) ) {
31430 UInt rE
= eregOfRexRM(pfx
, modrm
);
31431 assign( sV
, getYMMReg(rE
) );
31432 imm8
= getUChar(delta
+1);
31434 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameYMMReg(rE
),
31435 nameYMMReg(rV
), nameYMMReg(rG
));
31437 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31438 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
31439 imm8
= getUChar(delta
+alen
);
31441 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31442 nameYMMReg(rV
), nameYMMReg(rG
));
31445 breakupV256toV128s( dV
, &dHi
, &dLo
);
31446 breakupV256toV128s( sV
, &sHi
, &sLo
);
31447 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31448 mkexpr( math_PALIGNR_XMM( sHi
, dHi
, imm8
) ),
31449 mkexpr( math_PALIGNR_XMM( sLo
, dLo
, imm8
) ) )
31452 goto decode_success
;
31457 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31458 if (have66noF2noF3(pfx
)
31459 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31460 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
31461 goto decode_success
;
31466 /* VPEXTRW imm8, reg/m16, xmm2 */
31467 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31468 if (have66noF2noF3(pfx
)
31469 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31470 delta
= dis_PEXTRW( vbi
, pfx
, delta
, True
/*isAvx*/ );
31471 goto decode_success
;
31476 /* VPEXTRD imm8, r32/m32, xmm2 */
31477 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31478 if (have66noF2noF3(pfx
)
31479 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31480 delta
= dis_PEXTRD( vbi
, pfx
, delta
, True
/*isAvx*/ );
31481 goto decode_success
;
31483 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31484 if (have66noF2noF3(pfx
)
31485 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31486 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, True
/*isAvx*/ );
31487 goto decode_success
;
31492 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31493 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31494 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, True
/*isAvx*/ );
31495 goto decode_success
;
31500 /* VINSERTF128 r/m, rV, rD
31501 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31502 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31503 if (have66noF2noF3(pfx
)
31504 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31505 UChar modrm
= getUChar(delta
);
31507 UInt rG
= gregOfRexRM(pfx
, modrm
);
31508 UInt rV
= getVexNvvvv(pfx
);
31509 IRTemp t128
= newTemp(Ity_V128
);
31510 if (epartIsReg(modrm
)) {
31511 UInt rE
= eregOfRexRM(pfx
, modrm
);
31513 assign(t128
, getXMMReg(rE
));
31514 ib
= getUChar(delta
);
31515 DIP("vinsertf128 $%u,%s,%s,%s\n",
31516 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31518 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31519 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31521 ib
= getUChar(delta
);
31522 DIP("vinsertf128 $%u,%s,%s,%s\n",
31523 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31526 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31527 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31528 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31530 goto decode_success
;
31535 /* VEXTRACTF128 $lane_no, rS, r/m
31536 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31537 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31538 if (have66noF2noF3(pfx
)
31539 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31540 UChar modrm
= getUChar(delta
);
31542 UInt rS
= gregOfRexRM(pfx
, modrm
);
31543 IRTemp t128
= newTemp(Ity_V128
);
31544 if (epartIsReg(modrm
)) {
31545 UInt rD
= eregOfRexRM(pfx
, modrm
);
31547 ib
= getUChar(delta
);
31548 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31549 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31550 DIP("vextractf128 $%u,%s,%s\n",
31551 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31553 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31555 ib
= getUChar(delta
);
31556 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31557 storeLE(mkexpr(addr
), mkexpr(t128
));
31558 DIP("vextractf128 $%u,%s,%s\n",
31559 ib
, nameYMMReg(rS
), dis_buf
);
31562 /* doesn't use vvvv */
31563 goto decode_success
;
31568 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31569 if (have66noF2noF3(pfx
)
31570 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/
31571 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
31572 delta
= dis_VCVTPS2PH( vbi
, pfx
, delta
, /*is256bit=*/False
);
31573 goto decode_success
;
31575 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31576 if (have66noF2noF3(pfx
)
31577 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/
31578 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
31579 delta
= dis_VCVTPS2PH( vbi
, pfx
, delta
, /*is256bit=*/True
);
31580 goto decode_success
;
31585 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31586 if (have66noF2noF3(pfx
)
31587 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31588 UChar modrm
= getUChar(delta
);
31589 UInt rG
= gregOfRexRM(pfx
, modrm
);
31590 UInt rV
= getVexNvvvv(pfx
);
31592 IRTemp src_u8
= newTemp(Ity_I8
);
31594 if ( epartIsReg( modrm
) ) {
31595 UInt rE
= eregOfRexRM(pfx
,modrm
);
31596 imm8
= (Int
)(getUChar(delta
+1) & 15);
31597 assign( src_u8
, unop(Iop_32to8
, getIReg32( rE
)) );
31599 DIP( "vpinsrb $%d,%s,%s,%s\n",
31600 imm8
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31602 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31603 imm8
= (Int
)(getUChar(delta
+alen
) & 15);
31604 assign( src_u8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
31606 DIP( "vpinsrb $%d,%s,%s,%s\n",
31607 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31610 IRTemp src_vec
= newTemp(Ity_V128
);
31611 assign(src_vec
, getXMMReg( rV
));
31612 IRTemp res_vec
= math_PINSRB_128( src_vec
, src_u8
, imm8
);
31613 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31615 goto decode_success
;
31620 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31621 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31622 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31623 UChar modrm
= getUChar(delta
);
31624 UInt rG
= gregOfRexRM(pfx
, modrm
);
31625 UInt rV
= getVexNvvvv(pfx
);
31627 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
31628 const IRTemp inval
= IRTemp_INVALID
;
31630 if ( epartIsReg( modrm
) ) {
31631 UInt rE
= eregOfRexRM(pfx
, modrm
);
31632 IRTemp vE
= newTemp(Ity_V128
);
31633 assign( vE
, getXMMReg(rE
) );
31634 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
31635 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
31636 imm8
= getUChar(delta
+1);
31637 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
31639 DIP( "insertps $%u, %s,%s\n",
31640 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
31642 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31643 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31644 imm8
= getUChar(delta
+alen
);
31646 DIP( "insertps $%u, %s,%s\n",
31647 imm8
, dis_buf
, nameXMMReg(rG
) );
31650 IRTemp vV
= newTemp(Ity_V128
);
31651 assign( vV
, getXMMReg(rV
) );
31653 putYMMRegLoAndZU( rG
, mkexpr(math_INSERTPS( vV
, d2ins
, imm8
)) );
31655 goto decode_success
;
31660 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31661 if (have66noF2noF3(pfx
)
31662 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31663 UChar modrm
= getUChar(delta
);
31664 UInt rG
= gregOfRexRM(pfx
, modrm
);
31665 UInt rV
= getVexNvvvv(pfx
);
31667 IRTemp src_u32
= newTemp(Ity_I32
);
31669 if ( epartIsReg( modrm
) ) {
31670 UInt rE
= eregOfRexRM(pfx
,modrm
);
31671 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
31672 assign( src_u32
, getIReg32( rE
) );
31674 DIP( "vpinsrd $%d,%s,%s,%s\n",
31675 imm8_10
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31677 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31678 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
31679 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31681 DIP( "vpinsrd $%d,%s,%s,%s\n",
31682 imm8_10
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31685 IRTemp src_vec
= newTemp(Ity_V128
);
31686 assign(src_vec
, getXMMReg( rV
));
31687 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
31688 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31690 goto decode_success
;
31692 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31693 if (have66noF2noF3(pfx
)
31694 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31695 UChar modrm
= getUChar(delta
);
31696 UInt rG
= gregOfRexRM(pfx
, modrm
);
31697 UInt rV
= getVexNvvvv(pfx
);
31699 IRTemp src_u64
= newTemp(Ity_I64
);
31701 if ( epartIsReg( modrm
) ) {
31702 UInt rE
= eregOfRexRM(pfx
,modrm
);
31703 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
31704 assign( src_u64
, getIReg64( rE
) );
31706 DIP( "vpinsrq $%d,%s,%s,%s\n",
31707 imm8_0
, nameIReg64(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31709 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31710 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
31711 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
31713 DIP( "vpinsrq $%d,%s,%s,%s\n",
31714 imm8_0
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31717 IRTemp src_vec
= newTemp(Ity_V128
);
31718 assign(src_vec
, getXMMReg( rV
));
31719 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
31720 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31722 goto decode_success
;
31727 /* VINSERTI128 r/m, rV, rD
31728 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31729 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31730 if (have66noF2noF3(pfx
)
31731 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31732 UChar modrm
= getUChar(delta
);
31734 UInt rG
= gregOfRexRM(pfx
, modrm
);
31735 UInt rV
= getVexNvvvv(pfx
);
31736 IRTemp t128
= newTemp(Ity_V128
);
31737 if (epartIsReg(modrm
)) {
31738 UInt rE
= eregOfRexRM(pfx
, modrm
);
31740 assign(t128
, getXMMReg(rE
));
31741 ib
= getUChar(delta
);
31742 DIP("vinserti128 $%u,%s,%s,%s\n",
31743 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31745 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31746 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31748 ib
= getUChar(delta
);
31749 DIP("vinserti128 $%u,%s,%s,%s\n",
31750 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31753 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31754 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31755 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31757 goto decode_success
;
31762 /* VEXTRACTI128 $lane_no, rS, r/m
31763 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31764 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31765 if (have66noF2noF3(pfx
)
31766 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31767 UChar modrm
= getUChar(delta
);
31769 UInt rS
= gregOfRexRM(pfx
, modrm
);
31770 IRTemp t128
= newTemp(Ity_V128
);
31771 if (epartIsReg(modrm
)) {
31772 UInt rD
= eregOfRexRM(pfx
, modrm
);
31774 ib
= getUChar(delta
);
31775 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31776 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31777 DIP("vextracti128 $%u,%s,%s\n",
31778 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31780 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31782 ib
= getUChar(delta
);
31783 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31784 storeLE(mkexpr(addr
), mkexpr(t128
));
31785 DIP("vextracti128 $%u,%s,%s\n",
31786 ib
, nameYMMReg(rS
), dis_buf
);
31789 /* doesn't use vvvv */
31790 goto decode_success
;
31795 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31796 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31797 UChar modrm
= getUChar(delta
);
31798 UInt rG
= gregOfRexRM(pfx
, modrm
);
31799 UInt rV
= getVexNvvvv(pfx
);
31800 IRTemp dst_vec
= newTemp(Ity_V128
);
31802 if (epartIsReg( modrm
)) {
31803 UInt rE
= eregOfRexRM(pfx
,modrm
);
31804 imm8
= (Int
)getUChar(delta
+1);
31805 assign( dst_vec
, getXMMReg( rE
) );
31807 DIP( "vdpps $%d,%s,%s,%s\n",
31808 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31810 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31811 imm8
= (Int
)getUChar(delta
+alen
);
31812 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31814 DIP( "vdpps $%d,%s,%s,%s\n",
31815 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31818 IRTemp src_vec
= newTemp(Ity_V128
);
31819 assign(src_vec
, getXMMReg( rV
));
31820 IRTemp res_vec
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
31821 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31823 goto decode_success
;
31825 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31826 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31827 UChar modrm
= getUChar(delta
);
31828 UInt rG
= gregOfRexRM(pfx
, modrm
);
31829 UInt rV
= getVexNvvvv(pfx
);
31830 IRTemp dst_vec
= newTemp(Ity_V256
);
31832 if (epartIsReg( modrm
)) {
31833 UInt rE
= eregOfRexRM(pfx
,modrm
);
31834 imm8
= (Int
)getUChar(delta
+1);
31835 assign( dst_vec
, getYMMReg( rE
) );
31837 DIP( "vdpps $%d,%s,%s,%s\n",
31838 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31840 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31841 imm8
= (Int
)getUChar(delta
+alen
);
31842 assign( dst_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31844 DIP( "vdpps $%d,%s,%s,%s\n",
31845 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31848 IRTemp src_vec
= newTemp(Ity_V256
);
31849 assign(src_vec
, getYMMReg( rV
));
31850 IRTemp s0
, s1
, d0
, d1
;
31851 s0
= s1
= d0
= d1
= IRTemp_INVALID
;
31852 breakupV256toV128s( dst_vec
, &d1
, &d0
);
31853 breakupV256toV128s( src_vec
, &s1
, &s0
);
31854 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31855 mkexpr( math_DPPS_128(s1
, d1
, imm8
) ),
31856 mkexpr( math_DPPS_128(s0
, d0
, imm8
) ) ) );
31858 goto decode_success
;
31863 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31864 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31865 UChar modrm
= getUChar(delta
);
31866 UInt rG
= gregOfRexRM(pfx
, modrm
);
31867 UInt rV
= getVexNvvvv(pfx
);
31868 IRTemp dst_vec
= newTemp(Ity_V128
);
31870 if (epartIsReg( modrm
)) {
31871 UInt rE
= eregOfRexRM(pfx
,modrm
);
31872 imm8
= (Int
)getUChar(delta
+1);
31873 assign( dst_vec
, getXMMReg( rE
) );
31875 DIP( "vdppd $%d,%s,%s,%s\n",
31876 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31878 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31879 imm8
= (Int
)getUChar(delta
+alen
);
31880 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31882 DIP( "vdppd $%d,%s,%s,%s\n",
31883 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31886 IRTemp src_vec
= newTemp(Ity_V128
);
31887 assign(src_vec
, getXMMReg( rV
));
31888 IRTemp res_vec
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
31889 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31891 goto decode_success
;
31896 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31897 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31898 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31899 UChar modrm
= getUChar(delta
);
31901 IRTemp src_vec
= newTemp(Ity_V128
);
31902 IRTemp dst_vec
= newTemp(Ity_V128
);
31903 UInt rG
= gregOfRexRM(pfx
, modrm
);
31904 UInt rV
= getVexNvvvv(pfx
);
31906 assign( dst_vec
, getXMMReg(rV
) );
31908 if ( epartIsReg( modrm
) ) {
31909 UInt rE
= eregOfRexRM(pfx
, modrm
);
31911 imm8
= (Int
)getUChar(delta
+1);
31912 assign( src_vec
, getXMMReg(rE
) );
31914 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31915 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31917 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31918 1/* imm8 is 1 byte after the amode */ );
31919 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31920 imm8
= (Int
)getUChar(delta
+alen
);
31922 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31923 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31926 putYMMRegLoAndZU( rG
, mkexpr( math_MPSADBW_128(dst_vec
,
31927 src_vec
, imm8
) ) );
31929 goto decode_success
;
31931 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31932 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31933 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31934 UChar modrm
= getUChar(delta
);
31936 IRTemp src_vec
= newTemp(Ity_V256
);
31937 IRTemp dst_vec
= newTemp(Ity_V256
);
31938 UInt rG
= gregOfRexRM(pfx
, modrm
);
31939 UInt rV
= getVexNvvvv(pfx
);
31940 IRTemp sHi
, sLo
, dHi
, dLo
;
31941 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31943 assign( dst_vec
, getYMMReg(rV
) );
31945 if ( epartIsReg( modrm
) ) {
31946 UInt rE
= eregOfRexRM(pfx
, modrm
);
31948 imm8
= (Int
)getUChar(delta
+1);
31949 assign( src_vec
, getYMMReg(rE
) );
31951 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31952 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31954 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31955 1/* imm8 is 1 byte after the amode */ );
31956 assign( src_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31957 imm8
= (Int
)getUChar(delta
+alen
);
31959 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31960 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31963 breakupV256toV128s( dst_vec
, &dHi
, &dLo
);
31964 breakupV256toV128s( src_vec
, &sHi
, &sLo
);
31965 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31966 mkexpr( math_MPSADBW_128(dHi
, sHi
, imm8
>> 3) ),
31967 mkexpr( math_MPSADBW_128(dLo
, sLo
, imm8
) ) ) );
31969 goto decode_success
;
31974 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31975 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31976 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31977 * Carry-less multiplication of selected XMM quadwords into XMM
31978 * registers (a.k.a multiplication of polynomials over GF(2))
31980 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31981 UChar modrm
= getUChar(delta
);
31983 IRTemp sV
= newTemp(Ity_V128
);
31984 IRTemp dV
= newTemp(Ity_V128
);
31985 UInt rG
= gregOfRexRM(pfx
, modrm
);
31986 UInt rV
= getVexNvvvv(pfx
);
31988 assign( dV
, getXMMReg(rV
) );
31990 if ( epartIsReg( modrm
) ) {
31991 UInt rE
= eregOfRexRM(pfx
, modrm
);
31992 imm8
= (Int
)getUChar(delta
+1);
31993 assign( sV
, getXMMReg(rE
) );
31995 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8
,
31996 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31998 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31999 1/* imm8 is 1 byte after the amode */ );
32000 assign( sV
, loadLE( Ity_V128
, mkexpr(addr
) ) );
32001 imm8
= (Int
)getUChar(delta
+alen
);
32003 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
32004 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
32007 putYMMRegLoAndZU( rG
, mkexpr( math_PCLMULQDQ(dV
, sV
, imm8
) ) );
32009 goto decode_success
;
32014 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
32015 if (have66noF2noF3(pfx
)
32016 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
32017 UChar modrm
= getUChar(delta
);
32019 UInt rG
= gregOfRexRM(pfx
, modrm
);
32020 UInt rV
= getVexNvvvv(pfx
);
32021 IRTemp s00
= newTemp(Ity_V128
);
32022 IRTemp s01
= newTemp(Ity_V128
);
32023 IRTemp s10
= newTemp(Ity_V128
);
32024 IRTemp s11
= newTemp(Ity_V128
);
32025 assign(s00
, getYMMRegLane128(rV
, 0));
32026 assign(s01
, getYMMRegLane128(rV
, 1));
32027 if (epartIsReg(modrm
)) {
32028 UInt rE
= eregOfRexRM(pfx
, modrm
);
32030 imm8
= getUChar(delta
);
32031 DIP("vperm2i128 $%u,%s,%s,%s\n",
32032 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
32033 assign(s10
, getYMMRegLane128(rE
, 0));
32034 assign(s11
, getYMMRegLane128(rE
, 1));
32036 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
32038 imm8
= getUChar(delta
);
32039 DIP("vperm2i128 $%u,%s,%s,%s\n",
32040 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
32041 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
32042 mkexpr(addr
), mkU64(0))));
32043 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
32044 mkexpr(addr
), mkU64(16))));
32047 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
32048 : ((_nn)==2) ? s10 : s11)
32049 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
32050 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
32052 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
32053 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
32055 goto decode_success
;
32060 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
32061 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32062 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
32063 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32064 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
32065 "vblendvps", 4, Iop_SarN32x4
);
32067 goto decode_success
;
32069 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
32070 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32071 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
32072 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
32073 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
32074 "vblendvps", 4, Iop_SarN32x4
);
32076 goto decode_success
;
32081 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
32082 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32083 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
32084 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32085 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
32086 "vblendvpd", 8, Iop_SarN64x2
);
32088 goto decode_success
;
32090 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
32091 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32092 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
32093 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
32094 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
32095 "vblendvpd", 8, Iop_SarN64x2
);
32097 goto decode_success
;
32102 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
32103 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32104 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
32105 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32106 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
32107 "vpblendvb", 1, Iop_SarN8x16
);
32109 goto decode_success
;
32111 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
32112 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32113 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
32114 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
32115 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
32116 "vpblendvb", 1, Iop_SarN8x16
);
32118 goto decode_success
;
32126 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
32127 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
32128 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
32129 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
32130 (selected special cases that actually occur in glibc,
32131 not by any means a complete implementation.)
32133 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32134 Long delta0
= delta
;
32135 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
32136 if (delta
> delta0
) goto decode_success
;
32137 /* else fall though; dis_PCMPxSTRx failed to decode it */
32141 case 0x5C ... 0x5F:
32142 case 0x68 ... 0x6F:
32143 case 0x78 ... 0x7F:
32144 /* FIXME: list the instructions decoded here */
32145 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32146 Long delta0
= delta
;
32147 delta
= dis_FMA4( pfx
, delta
, opc
, uses_vvvv
, vbi
);
32148 if (delta
> delta0
) {
32149 dres
->hint
= Dis_HintVerbose
;
32150 goto decode_success
;
32152 /* else fall though; dis_FMA4 failed to decode it */
32157 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32158 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32159 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, True
/*!isAvx*/ );
32160 goto decode_success
;
32165 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32166 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32167 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
32168 Int size
= getRexW(pfx
) ? 8 : 4;
32169 IRType ty
= szToITy(size
);
32170 IRTemp src
= newTemp(ty
);
32171 UChar rm
= getUChar(delta
);
32174 if (epartIsReg(rm
)) {
32175 imm8
= getUChar(delta
+1);
32176 assign( src
, getIRegE(size
,pfx
,rm
) );
32177 DIP("rorx %d,%s,%s\n", imm8
, nameIRegE(size
,pfx
,rm
),
32178 nameIRegG(size
,pfx
,rm
));
32181 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
32182 imm8
= getUChar(delta
+alen
);
32183 assign( src
, loadLE(ty
, mkexpr(addr
)) );
32184 DIP("rorx %d,%s,%s\n", imm8
, dis_buf
, nameIRegG(size
,pfx
,rm
));
32189 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32190 putIRegG( size
, pfx
, rm
,
32191 imm8
== 0 ? mkexpr(src
)
32192 : binop( mkSizedOp(ty
,Iop_Or8
),
32193 binop( mkSizedOp(ty
,Iop_Shr8
), mkexpr(src
),
32195 binop( mkSizedOp(ty
,Iop_Shl8
), mkexpr(src
),
32196 mkU8(8*size
-imm8
) ) ) );
32197 /* Flags aren't modified. */
32198 goto decode_success
;
32215 /*------------------------------------------------------------*/
32217 /*--- Disassemble a single instruction ---*/
32219 /*------------------------------------------------------------*/
32221 /* Disassemble a single instruction into IR. The instruction is
32222 located in host memory at &guest_code[delta]. */
32225 DisResult
disInstr_AMD64_WRK (
32226 /*OUT*/Bool
* expect_CAS
,
32228 const VexArchInfo
* archinfo
,
32229 const VexAbiInfo
* vbi
,
32238 /* The running delta */
32239 Long delta
= delta64
;
32241 /* Holds eip at the start of the insn, so that we can print
32242 consistent error messages for unimplemented insns. */
32243 Long delta_start
= delta
;
32245 /* sz denotes the nominal data-op size of the insn; we change it to
32246 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32247 conflict REX.W takes precedence. */
32250 /* pfx holds the summary of prefixes. */
32251 Prefix pfx
= PFX_EMPTY
;
32253 /* Holds the computed opcode-escape indication. */
32254 Escape esc
= ESC_NONE
;
32256 /* Set result defaults. */
32257 dres
.whatNext
= Dis_Continue
;
32259 dres
.jk_StopHere
= Ijk_INVALID
;
32260 dres
.hint
= Dis_HintNone
;
32261 *expect_CAS
= False
;
32263 vassert(guest_RIP_next_assumed
== 0);
32264 vassert(guest_RIP_next_mustcheck
== False
);
32266 t1
= t2
= IRTemp_INVALID
;
32268 DIP("\t0x%llx: ", guest_RIP_bbstart
+delta
);
32270 /* Spot "Special" instructions (see comment at top of file). */
32272 const UChar
* code
= guest_code
+ delta
;
32273 /* Spot the 16-byte preamble:
32274 48C1C703 rolq $3, %rdi
32275 48C1C70D rolq $13, %rdi
32276 48C1C73D rolq $61, %rdi
32277 48C1C733 rolq $51, %rdi
32279 if (code
[ 0] == 0x48 && code
[ 1] == 0xC1 && code
[ 2] == 0xC7
32280 && code
[ 3] == 0x03 &&
32281 code
[ 4] == 0x48 && code
[ 5] == 0xC1 && code
[ 6] == 0xC7
32282 && code
[ 7] == 0x0D &&
32283 code
[ 8] == 0x48 && code
[ 9] == 0xC1 && code
[10] == 0xC7
32284 && code
[11] == 0x3D &&
32285 code
[12] == 0x48 && code
[13] == 0xC1 && code
[14] == 0xC7
32286 && code
[15] == 0x33) {
32287 /* Got a "Special" instruction preamble. Which one is it? */
32288 if (code
[16] == 0x48 && code
[17] == 0x87
32289 && code
[18] == 0xDB /* xchgq %rbx,%rbx */) {
32290 /* %RDX = client_request ( %RAX ) */
32291 DIP("%%rdx = client_request ( %%rax )\n");
32293 jmp_lit(&dres
, Ijk_ClientReq
, guest_RIP_bbstart
+delta
);
32294 vassert(dres
.whatNext
== Dis_StopHere
);
32295 goto decode_success
;
32298 if (code
[16] == 0x48 && code
[17] == 0x87
32299 && code
[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32300 /* %RAX = guest_NRADDR */
32301 DIP("%%rax = guest_NRADDR\n");
32303 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
32304 goto decode_success
;
32307 if (code
[16] == 0x48 && code
[17] == 0x87
32308 && code
[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32309 /* call-noredir *%RAX */
32310 DIP("call-noredir *%%rax\n");
32312 t1
= newTemp(Ity_I64
);
32313 assign(t1
, getIRegRAX(8));
32314 t2
= newTemp(Ity_I64
);
32315 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
32316 putIReg64(R_RSP
, mkexpr(t2
));
32317 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
));
32318 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
32319 vassert(dres
.whatNext
== Dis_StopHere
);
32320 goto decode_success
;
32323 if (code
[16] == 0x48 && code
[17] == 0x87
32324 && code
[18] == 0xff /* xchgq %rdi,%rdi */) {
32326 DIP("IR injection\n");
32327 vex_inject_ir(irsb
, Iend_LE
);
32329 // Invalidate the current insn. The reason is that the IRop we're
32330 // injecting here can change. In which case the translation has to
32331 // be redone. For ease of handling, we simply invalidate all the
32333 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_RIP_curr_instr
)));
32334 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(19)));
32338 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32339 dres
.whatNext
= Dis_StopHere
;
32340 dres
.jk_StopHere
= Ijk_InvalICache
;
32341 goto decode_success
;
32343 /* We don't know what it is. */
32344 goto decode_failure
;
32349 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32350 as many invalid combinations as possible. */
32353 if (n_prefixes
> 7) goto decode_failure
;
32354 pre
= getUChar(delta
);
32356 case 0x66: pfx
|= PFX_66
; break;
32357 case 0x67: pfx
|= PFX_ASO
; break;
32358 case 0xF2: pfx
|= PFX_F2
; break;
32359 case 0xF3: pfx
|= PFX_F3
; break;
32360 case 0xF0: pfx
|= PFX_LOCK
; *expect_CAS
= True
; break;
32361 case 0x2E: pfx
|= PFX_CS
; break;
32362 case 0x3E: pfx
|= PFX_DS
; break;
32363 case 0x26: pfx
|= PFX_ES
; break;
32364 case 0x64: pfx
|= PFX_FS
; break;
32365 case 0x65: pfx
|= PFX_GS
; break;
32366 case 0x36: pfx
|= PFX_SS
; break;
32367 case 0x40 ... 0x4F:
32369 if (pre
& (1<<3)) pfx
|= PFX_REXW
;
32370 if (pre
& (1<<2)) pfx
|= PFX_REXR
;
32371 if (pre
& (1<<1)) pfx
|= PFX_REXX
;
32372 if (pre
& (1<<0)) pfx
|= PFX_REXB
;
32375 goto not_a_legacy_prefix
;
32381 not_a_legacy_prefix
:
32382 /* We've used up all the non-VEX prefixes. Parse and validate a
32383 VEX prefix if that's appropriate. */
32384 if (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
) {
32385 /* Used temporarily for holding VEX prefixes. */
32386 UChar vex0
= getUChar(delta
);
32387 if (vex0
== 0xC4) {
32389 UChar vex1
= getUChar(delta
+1);
32390 UChar vex2
= getUChar(delta
+2);
32393 /* Snarf contents of byte 1 */
32394 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32395 /* X */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_REXX
;
32396 /* B */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_REXB
;
32398 switch (vex1
& 0x1F) {
32399 case 1: esc
= ESC_0F
; break;
32400 case 2: esc
= ESC_0F38
; break;
32401 case 3: esc
= ESC_0F3A
; break;
32402 /* Any other m-mmmm field will #UD */
32403 default: goto decode_failure
;
32405 /* Snarf contents of byte 2 */
32406 /* W */ pfx
|= (vex2
& (1<<7)) ? PFX_REXW
: 0;
32407 /* ~v3 */ pfx
|= (vex2
& (1<<6)) ? 0 : PFX_VEXnV3
;
32408 /* ~v2 */ pfx
|= (vex2
& (1<<5)) ? 0 : PFX_VEXnV2
;
32409 /* ~v1 */ pfx
|= (vex2
& (1<<4)) ? 0 : PFX_VEXnV1
;
32410 /* ~v0 */ pfx
|= (vex2
& (1<<3)) ? 0 : PFX_VEXnV0
;
32411 /* L */ pfx
|= (vex2
& (1<<2)) ? PFX_VEXL
: 0;
32413 switch (vex2
& 3) {
32415 case 1: pfx
|= PFX_66
; break;
32416 case 2: pfx
|= PFX_F3
; break;
32417 case 3: pfx
|= PFX_F2
; break;
32418 default: vassert(0);
32421 else if (vex0
== 0xC5) {
32423 UChar vex1
= getUChar(delta
+1);
32426 /* Snarf contents of byte 1 */
32427 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32428 /* ~v3 */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_VEXnV3
;
32429 /* ~v2 */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_VEXnV2
;
32430 /* ~v1 */ pfx
|= (vex1
& (1<<4)) ? 0 : PFX_VEXnV1
;
32431 /* ~v0 */ pfx
|= (vex1
& (1<<3)) ? 0 : PFX_VEXnV0
;
32432 /* L */ pfx
|= (vex1
& (1<<2)) ? PFX_VEXL
: 0;
32434 switch (vex1
& 3) {
32436 case 1: pfx
|= PFX_66
; break;
32437 case 2: pfx
|= PFX_F3
; break;
32438 case 3: pfx
|= PFX_F2
; break;
32439 default: vassert(0);
32444 /* Can't have both VEX and REX */
32445 if ((pfx
& PFX_VEX
) && (pfx
& PFX_REX
))
32446 goto decode_failure
; /* can't have both */
32449 /* Dump invalid combinations */
32451 if (pfx
& PFX_F2
) n
++;
32452 if (pfx
& PFX_F3
) n
++;
32454 goto decode_failure
; /* can't have both */
32457 if (pfx
& PFX_CS
) n
++;
32458 if (pfx
& PFX_DS
) n
++;
32459 if (pfx
& PFX_ES
) n
++;
32460 if (pfx
& PFX_FS
) n
++;
32461 if (pfx
& PFX_GS
) n
++;
32462 if (pfx
& PFX_SS
) n
++;
32464 goto decode_failure
; /* multiple seg overrides == illegal */
32466 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32467 that we should accept it. */
32468 if ((pfx
& PFX_FS
) && !vbi
->guest_amd64_assume_fs_is_const
)
32469 goto decode_failure
;
32471 /* Ditto for %gs prefixes. */
32472 if ((pfx
& PFX_GS
) && !vbi
->guest_amd64_assume_gs_is_const
)
32473 goto decode_failure
;
32477 if (pfx
& PFX_66
) sz
= 2;
32478 if ((pfx
& PFX_REX
) && (pfx
& PFX_REXW
)) sz
= 8;
32480 /* Now we should be looking at the primary opcode byte or the
32481 leading escapes. Check that any LOCK prefix is actually
32483 if (haveLOCK(pfx
)) {
32484 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
32487 *expect_CAS
= False
;
32488 goto decode_failure
;
32492 /* Eat up opcode escape bytes, until we're really looking at the
32493 primary opcode byte. But only if there's no VEX present. */
32494 if (!(pfx
& PFX_VEX
)) {
32495 vassert(esc
== ESC_NONE
);
32496 pre
= getUChar(delta
);
32499 pre
= getUChar(delta
);
32501 case 0x38: esc
= ESC_0F38
; delta
++; break;
32502 case 0x3A: esc
= ESC_0F3A
; delta
++; break;
32503 default: esc
= ESC_0F
; break;
32508 /* So now we're really really looking at the primary opcode
32510 Long delta_at_primary_opcode
= delta
;
32512 if (!(pfx
& PFX_VEX
)) {
32513 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32514 instructions preserve the upper 128 bits of YMM registers;
32515 iow we can simply ignore the presence of the upper halves of
32516 these registers. */
32519 delta
= dis_ESC_NONE( &dres
, expect_CAS
,
32520 archinfo
, vbi
, pfx
, sz
, delta
);
32523 delta
= dis_ESC_0F ( &dres
, expect_CAS
,
32524 archinfo
, vbi
, pfx
, sz
, delta
);
32527 delta
= dis_ESC_0F38( &dres
,
32528 archinfo
, vbi
, pfx
, sz
, delta
);
32531 delta
= dis_ESC_0F3A( &dres
,
32532 archinfo
, vbi
, pfx
, sz
, delta
);
32538 /* VEX prefixed instruction */
32539 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32540 prefix that loads a YMM register operand ..." zeroes out bits
32541 128 and above of the register. */
32542 Bool uses_vvvv
= False
;
32545 delta
= dis_ESC_0F__VEX ( &dres
, &uses_vvvv
,
32546 archinfo
, vbi
, pfx
, sz
, delta
);
32549 delta
= dis_ESC_0F38__VEX ( &dres
, &uses_vvvv
,
32550 archinfo
, vbi
, pfx
, sz
, delta
);
32553 delta
= dis_ESC_0F3A__VEX ( &dres
, &uses_vvvv
,
32554 archinfo
, vbi
, pfx
, sz
, delta
);
32557 /* The presence of a VEX prefix, by Intel definition,
32558 always implies at least an 0F escape. */
32559 goto decode_failure
;
32563 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32566 if (getVexNvvvv(pfx
) != 0)
32567 goto decode_failure
;
32571 vassert(delta
- delta_at_primary_opcode
>= 0);
32572 vassert(delta
- delta_at_primary_opcode
< 16/*let's say*/);
32574 /* Use delta == delta_at_primary_opcode to denote decode failure.
32575 This implies that any successful decode must use at least one
32577 if (delta
== delta_at_primary_opcode
)
32578 goto decode_failure
;
32580 goto decode_success
; /* \o/ */
32584 /* All decode failures end up here. */
32586 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32587 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32588 getUChar(delta_start
+0),
32589 getUChar(delta_start
+1),
32590 getUChar(delta_start
+2),
32591 getUChar(delta_start
+3),
32592 getUChar(delta_start
+4),
32593 getUChar(delta_start
+5),
32594 getUChar(delta_start
+6),
32595 getUChar(delta_start
+7),
32596 getUChar(delta_start
+8),
32597 getUChar(delta_start
+9) );
32598 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32599 haveREX(pfx
) ? 1 : 0, getRexW(pfx
), getRexR(pfx
),
32600 getRexX(pfx
), getRexB(pfx
));
32601 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32602 haveVEX(pfx
) ? 1 : 0, getVexL(pfx
),
32604 esc
==ESC_NONE
? "NONE" :
32605 esc
==ESC_0F
? "0F" :
32606 esc
==ESC_0F38
? "0F38" :
32607 esc
==ESC_0F3A
? "0F3A" : "???");
32608 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32609 have66(pfx
) ? 1 : 0, haveF2(pfx
) ? 1 : 0,
32610 haveF3(pfx
) ? 1 : 0);
32613 /* Tell the dispatcher that this insn cannot be decoded, and so has
32614 not been executed, and (is currently) the next to be executed.
32615 RIP should be up-to-date since it made so at the start of each
32616 insn, but nevertheless be paranoid and update it again right
32618 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
32619 jmp_lit(&dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
32620 vassert(dres
.whatNext
== Dis_StopHere
);
32622 /* We also need to say that a CAS is not expected now, regardless
32623 of what it might have been set to at the start of the function,
32624 since the IR that we've emitted just above (to synthesis a
32625 SIGILL) does not involve any CAS, and presumably no other IR has
32626 been emitted for this (non-decoded) insn. */
32627 *expect_CAS
= False
;
32632 /* All decode successes end up here. */
32633 switch (dres
.whatNext
) {
32635 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32644 dres
.len
= toUInt(delta
- delta_start
);
32652 /*------------------------------------------------------------*/
32653 /*--- Top-level fn ---*/
32654 /*------------------------------------------------------------*/
32656 /* Disassemble a single instruction into IR. The instruction
32657 is located in host memory at &guest_code[delta]. */
32659 DisResult
disInstr_AMD64 ( IRSB
* irsb_IN
,
32660 const UChar
* guest_code_IN
,
32663 VexArch guest_arch
,
32664 const VexArchInfo
* archinfo
,
32665 const VexAbiInfo
* abiinfo
,
32666 VexEndness host_endness_IN
,
32667 Bool sigill_diag_IN
)
32670 Bool expect_CAS
, has_CAS
;
32673 /* Set globals (see top of this file) */
32674 vassert(guest_arch
== VexArchAMD64
);
32675 guest_code
= guest_code_IN
;
32677 host_endness
= host_endness_IN
;
32678 guest_RIP_curr_instr
= guest_IP
;
32679 guest_RIP_bbstart
= guest_IP
- delta
;
32681 /* We'll consult these after doing disInstr_AMD64_WRK. */
32682 guest_RIP_next_assumed
= 0;
32683 guest_RIP_next_mustcheck
= False
;
32685 x1
= irsb_IN
->stmts_used
;
32686 expect_CAS
= False
;
32687 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32688 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32689 x2
= irsb_IN
->stmts_used
;
32692 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32693 got it right. Failure of this assertion is serious and denotes
32694 a bug in disInstr. */
32695 if (guest_RIP_next_mustcheck
32696 && guest_RIP_next_assumed
!= guest_RIP_curr_instr
+ dres
.len
) {
32698 vex_printf(" current %%rip = 0x%llx\n",
32699 guest_RIP_curr_instr
);
32700 vex_printf("assumed next %%rip = 0x%llx\n",
32701 guest_RIP_next_assumed
);
32702 vex_printf(" actual next %%rip = 0x%llx\n",
32703 guest_RIP_curr_instr
+ dres
.len
);
32704 vex_printf("instruction bytes: "
32705 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32715 getUChar(delta
+9) );
32717 /* re-disassemble the instruction so as
32718 to generate a useful error message; then assert. */
32719 vex_traceflags
|= VEX_TRACE_FE
;
32720 guest_RIP_next_assumed
= 0;
32721 guest_RIP_next_mustcheck
= False
;
32722 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32723 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32724 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32727 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32728 expect_CAS. Here, we (sanity-)check for the presence/absence of
32729 IRCAS as directed by the returned expect_CAS value. */
32731 for (i
= x1
; i
< x2
; i
++) {
32732 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
32736 if (expect_CAS
!= has_CAS
) {
32737 /* inconsistency detected. re-disassemble the instruction so as
32738 to generate a useful error message; then assert. */
32739 vex_traceflags
|= VEX_TRACE_FE
;
32740 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32741 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32742 for (i
= x1
; i
< x2
; i
++) {
32743 vex_printf("\t\t");
32744 ppIRStmt(irsb_IN
->stmts
[i
]);
32747 /* Failure of this assertion is serious and denotes a bug in
32749 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32756 /*------------------------------------------------------------*/
32757 /*--- Unused stuff ---*/
32758 /*------------------------------------------------------------*/
32760 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32761 // this should ever be needed.
32763 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32765 // /* Scheme is simple: propagate the most significant 1-bit into all
32766 // lower positions in the word. This gives a word of the form
32767 // 0---01---1. Now invert it, giving a word of the form
32768 // 1---10---0, then do a population-count idiom (to count the 1s,
32769 // which is the number of leading zeroes, or the word size if the
32770 // original word was 0.
32774 // for (i = 0; i < 7; i++) {
32775 // t[i] = newTemp(ty);
32777 // if (ty == Ity_I64) {
32778 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32779 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32780 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32781 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32782 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32783 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32784 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32785 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32786 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32787 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32788 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32789 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32790 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32791 // return gen_POPCOUNT(ty, t[6]);
32793 // if (ty == Ity_I32) {
32794 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32795 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32796 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32797 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32798 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32799 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32800 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32801 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32802 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32803 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32804 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32805 // return gen_POPCOUNT(ty, t[5]);
32807 // if (ty == Ity_I16) {
32808 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32809 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32810 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32811 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32812 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32813 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32814 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32815 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32816 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32817 // return gen_POPCOUNT(ty, t[4]);
32823 /*--------------------------------------------------------------------*/
32824 /*--- end guest_amd64_toIR.c ---*/
32825 /*--------------------------------------------------------------------*/