2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates AMD64 code to IR. */
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
45 * all arithmetic done at 64 bits
47 * no FP exceptions, except for handling stack over/underflow
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
54 * some of the FCOM cases could do with testing -- not convinced
55 that the args are the right way round.
57 * FSAVE does not re-initialise the FPU; it should do
59 * FINIT not only initialises the FPU environment, it also zeroes
60 all the FP registers. It should leave the registers unchanged.
62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
63 per Intel docs this bit has no meaning anyway. Since PUSHF is the
64 only way to observe eflags[1], a proper fix would be to make that
67 This module uses global variables and so is not MT-safe (if that
68 should ever become relevant).
71 /* Notes re address size overrides (0x67).
73 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
75 and System Instructions"), Section 1.2.3 ("Address-Size Override
78 0x67 applies to all explicit memory references, causing the top
79 32 bits of the effective address to become zero.
81 0x67 has no effect on stack references (push/pop); these always
84 0x67 changes the interpretation of instructions which implicitly
85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
100 /* "Special" instructions.
102 This instruction decoder can decode three special instructions
103 which mean nothing natively (are no-ops as far as regs/mem are
104 concerned) but have meaning for supporting Valgrind. A special
105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
108 Following that, one of the following 3 are allowed (standard
109 interpretation in parentheses):
111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
114 4887F6 (xchgq %rdi,%rdi) IR injection
116 Any other bytes following the 16-byte preamble are illegal and
117 constitute a failure in instruction decoding. This all assumes
118 that the preamble will never occur except in specific code
119 fragments designed for Valgrind to catch.
121 No prefixes may precede a "Special" instruction.
124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
125 insns: the side-exit back to the start of the insn is done with
126 Ijk_Boring. This is quite wrong, it should be done with
127 Ijk_NoRedir, since otherwise the side exit, which is intended to
128 restart the instruction for whatever reason, could go somewhere
129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
130 no-redir jumps performance critical, at least for rep-prefixed
131 instructions, since all iterations thereof would involve such a
132 jump. It's not such a big deal with casLE since the side exit is
133 only taken if the CAS fails, that is, the location is contended,
134 which is relatively unlikely.
136 Note also, the test for CAS success vs failure is done using
137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
139 shouldn't definedness-check these comparisons. See
140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
141 background/rationale.
144 /* LOCK prefixed instructions. These are translated using IR-level
145 CAS statements (IRCAS) and are believed to preserve atomicity, even
146 from the point of view of some other process racing against a
147 simulated one (presumably they communicate via a shared memory
150 Handlers which are aware of LOCK prefixes are:
151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
152 dis_cmpxchg_G_E (cmpxchg)
153 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
157 dis_Grp8_Imm (bts, btc, btr)
158 dis_bt_G_E (bts, btc, btr)
163 #include "libvex_basictypes.h"
164 #include "libvex_ir.h"
166 #include "libvex_guest_amd64.h"
168 #include "main_util.h"
169 #include "main_globals.h"
170 #include "guest_generic_bb_to_IR.h"
171 #include "guest_generic_x87.h"
172 #include "guest_amd64_defs.h"
175 /*------------------------------------------------------------*/
177 /*------------------------------------------------------------*/
179 /* These are set at the start of the translation of an insn, right
180 down in disInstr_AMD64, so that we don't have to pass them around
181 endlessly. They are all constant during the translation of any
184 /* These are set at the start of the translation of a BB, so
185 that we don't have to pass them around endlessly. */
187 /* We need to know this to do sub-register accesses correctly. */
188 static VexEndness host_endness
;
190 /* Pointer to the guest code area (points to start of BB, not to the
191 insn being processed). */
192 static const UChar
* guest_code
;
194 /* The guest address corresponding to guest_code[0]. */
195 static Addr64 guest_RIP_bbstart
;
197 /* The guest address for the instruction currently being
199 static Addr64 guest_RIP_curr_instr
;
201 /* The IRSB* into which we're generating code. */
204 /* For ensuring that %rip-relative addressing is done right. A read
205 of %rip generates the address of the next instruction. It may be
206 that we don't conveniently know that inside disAMode(). For sanity
207 checking, if the next insn %rip is needed, we make a guess at what
208 it is, record that guess here, and set the accompanying Bool to
209 indicate that -- after this insn's decode is finished -- that guess
210 needs to be checked. */
212 /* At the start of each insn decode, is set to (0, False).
213 After the decode, if _mustcheck is now True, _assumed is
216 static Addr64 guest_RIP_next_assumed
;
217 static Bool guest_RIP_next_mustcheck
;
220 /*------------------------------------------------------------*/
221 /*--- Helpers for constructing IR. ---*/
222 /*------------------------------------------------------------*/
224 /* Generate a new temporary of the given type. */
225 static IRTemp
newTemp ( IRType ty
)
227 vassert(isPlausibleIRType(ty
));
228 return newIRTemp( irsb
->tyenv
, ty
);
231 /* Add a statement to the list held by "irsb". */
232 static void stmt ( IRStmt
* st
)
234 addStmtToIRSB( irsb
, st
);
237 /* Generate a statement "dst := e". */
238 static void assign ( IRTemp dst
, IRExpr
* e
)
240 stmt( IRStmt_WrTmp(dst
, e
) );
243 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
245 return IRExpr_Unop(op
, a
);
248 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
250 return IRExpr_Binop(op
, a1
, a2
);
253 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
255 return IRExpr_Triop(op
, a1
, a2
, a3
);
258 static IRExpr
* mkexpr ( IRTemp tmp
)
260 return IRExpr_RdTmp(tmp
);
263 static IRExpr
* mkU8 ( ULong i
)
266 return IRExpr_Const(IRConst_U8( (UChar
)i
));
269 static IRExpr
* mkU16 ( ULong i
)
271 vassert(i
< 0x10000ULL
);
272 return IRExpr_Const(IRConst_U16( (UShort
)i
));
275 static IRExpr
* mkU32 ( ULong i
)
277 vassert(i
< 0x100000000ULL
);
278 return IRExpr_Const(IRConst_U32( (UInt
)i
));
281 static IRExpr
* mkU64 ( ULong i
)
283 return IRExpr_Const(IRConst_U64(i
));
286 static IRExpr
* mkU ( IRType ty
, ULong i
)
289 case Ity_I8
: return mkU8(i
);
290 case Ity_I16
: return mkU16(i
);
291 case Ity_I32
: return mkU32(i
);
292 case Ity_I64
: return mkU64(i
);
293 default: vpanic("mkU(amd64)");
297 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
299 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
302 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
304 return IRExpr_Load(Iend_LE
, ty
, addr
);
307 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
309 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
311 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
312 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
313 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
314 || op8
== Iop_CasCmpNE8
315 || op8
== Iop_Not8
);
317 case Ity_I8
: return 0 +op8
;
318 case Ity_I16
: return 1 +op8
;
319 case Ity_I32
: return 2 +op8
;
320 case Ity_I64
: return 3 +op8
;
321 default: vpanic("mkSizedOp(amd64)");
326 IRExpr
* doScalarWidening ( Int szSmall
, Int szBig
, Bool signd
, IRExpr
* src
)
328 if (szSmall
== 1 && szBig
== 4) {
329 return unop(signd
? Iop_8Sto32
: Iop_8Uto32
, src
);
331 if (szSmall
== 1 && szBig
== 2) {
332 return unop(signd
? Iop_8Sto16
: Iop_8Uto16
, src
);
334 if (szSmall
== 2 && szBig
== 4) {
335 return unop(signd
? Iop_16Sto32
: Iop_16Uto32
, src
);
337 if (szSmall
== 1 && szBig
== 8 && !signd
) {
338 return unop(Iop_8Uto64
, src
);
340 if (szSmall
== 1 && szBig
== 8 && signd
) {
341 return unop(Iop_8Sto64
, src
);
343 if (szSmall
== 2 && szBig
== 8 && !signd
) {
344 return unop(Iop_16Uto64
, src
);
346 if (szSmall
== 2 && szBig
== 8 && signd
) {
347 return unop(Iop_16Sto64
, src
);
349 vpanic("doScalarWidening(amd64)");
353 void putGuarded ( Int gstOffB
, IRExpr
* guard
, IRExpr
* value
)
355 IRType ty
= typeOfIRExpr(irsb
->tyenv
, value
);
356 stmt( IRStmt_Put(gstOffB
,
357 IRExpr_ITE(guard
, value
, IRExpr_Get(gstOffB
, ty
))) );
361 /*------------------------------------------------------------*/
362 /*--- Debugging output ---*/
363 /*------------------------------------------------------------*/
365 /* Bomb out if we can't handle something. */
366 __attribute__ ((noreturn
))
367 static void unimplemented ( const HChar
* str
)
369 vex_printf("amd64toIR: unimplemented feature\n");
373 #define DIP(format, args...) \
374 if (vex_traceflags & VEX_TRACE_FE) \
375 vex_printf(format, ## args)
377 #define DIS(buf, format, args...) \
378 if (vex_traceflags & VEX_TRACE_FE) \
379 vex_sprintf(buf, format, ## args)
382 /*------------------------------------------------------------*/
383 /*--- Offsets of various parts of the amd64 guest state. ---*/
384 /*------------------------------------------------------------*/
386 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
387 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
388 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
389 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
390 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
391 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
392 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
393 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
394 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
395 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
396 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
397 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
398 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
399 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
400 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
401 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
403 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
405 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
406 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
408 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
409 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
410 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
411 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
413 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
414 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
415 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
416 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
417 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
418 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
419 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
420 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
422 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
423 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
424 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
425 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
426 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
427 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
428 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
429 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
430 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
431 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
432 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
433 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
434 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
435 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
436 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
437 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
438 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
439 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
441 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
442 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
443 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
445 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
448 /*------------------------------------------------------------*/
449 /*--- Helper bits and pieces for deconstructing the ---*/
450 /*--- amd64 insn stream. ---*/
451 /*------------------------------------------------------------*/
453 /* This is the AMD64 register encoding -- integer regs. */
471 /* This is the Intel register encoding -- segment regs. */
480 /* Various simple conversions */
482 static ULong
extend_s_8to64 ( UChar x
)
484 return (ULong
)((Long
)(((ULong
)x
) << 56) >> 56);
487 static ULong
extend_s_16to64 ( UShort x
)
489 return (ULong
)((Long
)(((ULong
)x
) << 48) >> 48);
492 static ULong
extend_s_32to64 ( UInt x
)
494 return (ULong
)((Long
)(((ULong
)x
) << 32) >> 32);
497 /* Figure out whether the mod and rm parts of a modRM byte refer to a
498 register or memory. If so, the byte will have the form 11XXXYYY,
499 where YYY is the register number. */
501 static Bool
epartIsReg ( UChar mod_reg_rm
)
503 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
506 /* Extract the 'g' field from a modRM byte. This only produces 3
507 bits, which is not a complete register number. You should avoid
508 this function if at all possible. */
510 static Int
gregLO3ofRM ( UChar mod_reg_rm
)
512 return (Int
)( (mod_reg_rm
>> 3) & 7 );
515 /* Ditto the 'e' field of a modRM byte. */
517 static Int
eregLO3ofRM ( UChar mod_reg_rm
)
519 return (Int
)(mod_reg_rm
& 0x7);
522 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
524 static inline UChar
getUChar ( Long delta
)
526 UChar v
= guest_code
[delta
+0];
530 static UInt
getUDisp16 ( Long delta
)
532 UInt v
= guest_code
[delta
+1]; v
<<= 8;
533 v
|= guest_code
[delta
+0];
537 //.. static UInt getUDisp ( Int size, Long delta )
540 //.. case 4: return getUDisp32(delta);
541 //.. case 2: return getUDisp16(delta);
542 //.. case 1: return getUChar(delta);
543 //.. default: vpanic("getUDisp(x86)");
545 //.. return 0; /*notreached*/
549 /* Get a byte value out of the insn stream and sign-extend to 64
551 static Long
getSDisp8 ( Long delta
)
553 return extend_s_8to64( guest_code
[delta
] );
556 /* Get a 16-bit value out of the insn stream and sign-extend to 64
558 static Long
getSDisp16 ( Long delta
)
560 UInt v
= guest_code
[delta
+1]; v
<<= 8;
561 v
|= guest_code
[delta
+0];
562 return extend_s_16to64( (UShort
)v
);
565 /* Get a 32-bit value out of the insn stream and sign-extend to 64
567 static Long
getSDisp32 ( Long delta
)
569 UInt v
= guest_code
[delta
+3]; v
<<= 8;
570 v
|= guest_code
[delta
+2]; v
<<= 8;
571 v
|= guest_code
[delta
+1]; v
<<= 8;
572 v
|= guest_code
[delta
+0];
573 return extend_s_32to64( v
);
576 /* Get a 64-bit value out of the insn stream. */
577 static Long
getDisp64 ( Long delta
)
580 v
|= guest_code
[delta
+7]; v
<<= 8;
581 v
|= guest_code
[delta
+6]; v
<<= 8;
582 v
|= guest_code
[delta
+5]; v
<<= 8;
583 v
|= guest_code
[delta
+4]; v
<<= 8;
584 v
|= guest_code
[delta
+3]; v
<<= 8;
585 v
|= guest_code
[delta
+2]; v
<<= 8;
586 v
|= guest_code
[delta
+1]; v
<<= 8;
587 v
|= guest_code
[delta
+0];
591 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
592 if this is called with size==8. Should not happen. */
593 static Long
getSDisp ( Int size
, Long delta
)
596 case 4: return getSDisp32(delta
);
597 case 2: return getSDisp16(delta
);
598 case 1: return getSDisp8(delta
);
599 default: vpanic("getSDisp(amd64)");
603 static ULong
mkSizeMask ( Int sz
)
606 case 1: return 0x00000000000000FFULL
;
607 case 2: return 0x000000000000FFFFULL
;
608 case 4: return 0x00000000FFFFFFFFULL
;
609 case 8: return 0xFFFFFFFFFFFFFFFFULL
;
610 default: vpanic("mkSzMask(amd64)");
614 static Int
imin ( Int a
, Int b
)
616 return (a
< b
) ? a
: b
;
619 static IRType
szToITy ( Int n
)
622 case 1: return Ity_I8
;
623 case 2: return Ity_I16
;
624 case 4: return Ity_I32
;
625 case 8: return Ity_I64
;
626 default: vex_printf("\nszToITy(%d)\n", n
);
627 vpanic("szToITy(amd64)");
632 /*------------------------------------------------------------*/
633 /*--- For dealing with prefixes. ---*/
634 /*------------------------------------------------------------*/
636 /* The idea is to pass around an int holding a bitmask summarising
637 info from the prefixes seen on the current instruction, including
638 info from the REX byte. This info is used in various places, but
639 most especially when making sense of register fields in
642 The top 8 bits of the prefix are 0x55, just as a hacky way to
643 ensure it really is a valid prefix.
645 Things you can safely assume about a well-formed prefix:
646 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
647 * if REX is not present then REXW,REXR,REXX,REXB will read
649 * F2 and F3 will not both be 1.
654 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
655 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
656 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
657 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
658 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
659 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
660 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
661 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
662 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
663 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
664 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
665 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
666 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
667 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
668 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
669 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
670 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
671 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
672 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
673 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
675 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
676 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
677 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
678 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
681 #define PFX_EMPTY 0x55000000
683 static Bool
IS_VALID_PFX ( Prefix pfx
) {
684 return toBool((pfx
& 0xFF000000) == PFX_EMPTY
);
687 static Bool
haveREX ( Prefix pfx
) {
688 return toBool(pfx
& PFX_REX
);
691 static Int
getRexW ( Prefix pfx
) {
692 return (pfx
& PFX_REXW
) ? 1 : 0;
694 static Int
getRexR ( Prefix pfx
) {
695 return (pfx
& PFX_REXR
) ? 1 : 0;
697 static Int
getRexX ( Prefix pfx
) {
698 return (pfx
& PFX_REXX
) ? 1 : 0;
700 static Int
getRexB ( Prefix pfx
) {
701 return (pfx
& PFX_REXB
) ? 1 : 0;
704 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
705 completely changes what instruction it really is. */
706 static Bool
haveF2orF3 ( Prefix pfx
) {
707 return toBool((pfx
& (PFX_F2
|PFX_F3
)) > 0);
709 static Bool
haveF2andF3 ( Prefix pfx
) {
710 return toBool((pfx
& (PFX_F2
|PFX_F3
)) == (PFX_F2
|PFX_F3
));
712 static Bool
haveF2 ( Prefix pfx
) {
713 return toBool((pfx
& PFX_F2
) > 0);
715 static Bool
haveF3 ( Prefix pfx
) {
716 return toBool((pfx
& PFX_F3
) > 0);
719 static Bool
have66 ( Prefix pfx
) {
720 return toBool((pfx
& PFX_66
) > 0);
722 static Bool
haveASO ( Prefix pfx
) {
723 return toBool((pfx
& PFX_ASO
) > 0);
725 static Bool
haveLOCK ( Prefix pfx
) {
726 return toBool((pfx
& PFX_LOCK
) > 0);
729 /* Return True iff pfx has 66 set and F2 and F3 clear */
730 static Bool
have66noF2noF3 ( Prefix pfx
)
733 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_66
);
736 /* Return True iff pfx has F2 set and 66 and F3 clear */
737 static Bool
haveF2no66noF3 ( Prefix pfx
)
740 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F2
);
743 /* Return True iff pfx has F3 set and 66 and F2 clear */
744 static Bool
haveF3no66noF2 ( Prefix pfx
)
747 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F3
);
750 /* Return True iff pfx has F3 set and F2 clear */
751 static Bool
haveF3noF2 ( Prefix pfx
)
754 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F3
);
757 /* Return True iff pfx has F2 set and F3 clear */
758 static Bool
haveF2noF3 ( Prefix pfx
)
761 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F2
);
764 /* Return True iff pfx has 66, F2 and F3 clear */
765 static Bool
haveNo66noF2noF3 ( Prefix pfx
)
768 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == 0);
771 /* Return True iff pfx has any of 66, F2 and F3 set */
772 static Bool
have66orF2orF3 ( Prefix pfx
)
774 return toBool( ! haveNo66noF2noF3(pfx
) );
777 /* Return True iff pfx has 66 or F3 set */
778 static Bool
have66orF3 ( Prefix pfx
)
780 return toBool((pfx
& (PFX_66
|PFX_F3
)) > 0);
783 /* Clear all the segment-override bits in a prefix. */
784 static Prefix
clearSegBits ( Prefix p
)
787 p
& ~(PFX_CS
| PFX_DS
| PFX_ES
| PFX_FS
| PFX_GS
| PFX_SS
);
790 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
791 static UInt
getVexNvvvv ( Prefix pfx
) {
793 r
/= (UInt
)PFX_VEXnV0
; /* pray this turns into a shift */
797 static Bool
haveVEX ( Prefix pfx
) {
798 return toBool(pfx
& PFX_VEX
);
801 static Int
getVexL ( Prefix pfx
) {
802 return (pfx
& PFX_VEXL
) ? 1 : 0;
806 /*------------------------------------------------------------*/
807 /*--- For dealing with escapes ---*/
808 /*------------------------------------------------------------*/
811 /* Escapes come after the prefixes, but before the primary opcode
812 byte. They escape the primary opcode byte into a bigger space.
813 The 0xF0000000 isn't significant, except so as to make it not
814 overlap valid Prefix values, for sanity checking.
819 ESC_NONE
=0xF0000000, // none
827 /*------------------------------------------------------------*/
828 /*--- For dealing with integer registers ---*/
829 /*------------------------------------------------------------*/
831 /* This is somewhat complex. The rules are:
833 For 64, 32 and 16 bit register references, the e or g fields in the
834 modrm bytes supply the low 3 bits of the register number. The
835 fourth (most-significant) bit of the register number is supplied by
836 the REX byte, if it is present; else that bit is taken to be zero.
838 The REX.R bit supplies the high bit corresponding to the g register
839 field, and the REX.B bit supplies the high bit corresponding to the
840 e register field (when the mod part of modrm indicates that modrm's
841 e component refers to a register and not to memory).
843 The REX.X bit supplies a high register bit for certain registers
844 in SIB address modes, and is generally rarely used.
846 For 8 bit register references, the presence of the REX byte itself
847 has significance. If there is no REX present, then the 3-bit
848 number extracted from the modrm e or g field is treated as an index
849 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
850 old x86 encoding scheme.
852 But if there is a REX present, the register reference is
853 interpreted in the same way as for 64/32/16-bit references: a high
854 bit is extracted from REX, giving a 4-bit number, and the denoted
855 register is the lowest 8 bits of the 16 integer registers denoted
856 by the number. In particular, values 3 through 7 of this sequence
857 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
860 The REX.W bit has no bearing at all on register numbers. Instead
861 its presence indicates that the operand size is to be overridden
862 from its default value (32 bits) to 64 bits instead. This is in
863 the same fashion that an 0x66 prefix indicates the operand size is
864 to be overridden from 32 bits down to 16 bits. When both REX.W and
865 0x66 are present there is a conflict, and REX.W takes precedence.
867 Rather than try to handle this complexity using a single huge
868 function, several smaller ones are provided. The aim is to make it
869 as difficult as possible to screw up register decoding in a subtle
870 and hard-to-track-down way.
872 Because these routines fish around in the host's memory (that is,
873 in the guest state area) for sub-parts of guest registers, their
874 correctness depends on the host's endianness. So far these
875 routines only work for little-endian hosts. Those for which
876 endianness is important have assertions to ensure sanity.
880 /* About the simplest question you can ask: where do the 64-bit
881 integer registers live (in the guest state) ? */
883 static Int
integerGuestReg64Offset ( UInt reg
)
886 case R_RAX
: return OFFB_RAX
;
887 case R_RCX
: return OFFB_RCX
;
888 case R_RDX
: return OFFB_RDX
;
889 case R_RBX
: return OFFB_RBX
;
890 case R_RSP
: return OFFB_RSP
;
891 case R_RBP
: return OFFB_RBP
;
892 case R_RSI
: return OFFB_RSI
;
893 case R_RDI
: return OFFB_RDI
;
894 case R_R8
: return OFFB_R8
;
895 case R_R9
: return OFFB_R9
;
896 case R_R10
: return OFFB_R10
;
897 case R_R11
: return OFFB_R11
;
898 case R_R12
: return OFFB_R12
;
899 case R_R13
: return OFFB_R13
;
900 case R_R14
: return OFFB_R14
;
901 case R_R15
: return OFFB_R15
;
902 default: vpanic("integerGuestReg64Offset(amd64)");
907 /* Produce the name of an integer register, for printing purposes.
908 reg is a number in the range 0 .. 15 that has been generated from a
909 3-bit reg-field number and a REX extension bit. irregular denotes
910 the case where sz==1 and no REX byte is present. */
913 const HChar
* nameIReg ( Int sz
, UInt reg
, Bool irregular
)
915 static const HChar
* ireg64_names
[16]
916 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
917 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
918 static const HChar
* ireg32_names
[16]
919 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
920 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
921 static const HChar
* ireg16_names
[16]
922 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
923 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
924 static const HChar
* ireg8_names
[16]
925 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
926 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
927 static const HChar
* ireg8_irregular
[8]
928 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
935 vassert(irregular
== False
);
939 case 8: return ireg64_names
[reg
];
940 case 4: return ireg32_names
[reg
];
941 case 2: return ireg16_names
[reg
];
942 case 1: if (irregular
) {
943 return ireg8_irregular
[reg
];
945 return ireg8_names
[reg
];
947 default: vpanic("nameIReg(amd64)");
951 /* Using the same argument conventions as nameIReg, produce the
952 guest state offset of an integer register. */
955 Int
offsetIReg ( Int sz
, UInt reg
, Bool irregular
)
962 vassert(irregular
== False
);
965 /* Deal with irregular case -- sz==1 and no REX present */
966 if (sz
== 1 && irregular
) {
968 case R_RSP
: return 1+ OFFB_RAX
;
969 case R_RBP
: return 1+ OFFB_RCX
;
970 case R_RSI
: return 1+ OFFB_RDX
;
971 case R_RDI
: return 1+ OFFB_RBX
;
972 default: break; /* use the normal case */
977 return integerGuestReg64Offset(reg
);
981 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
983 static IRExpr
* getIRegCL ( void )
985 vassert(host_endness
== VexEndnessLE
);
986 return IRExpr_Get( OFFB_RCX
, Ity_I8
);
990 /* Write to the %AH register. */
992 static void putIRegAH ( IRExpr
* e
)
994 vassert(host_endness
== VexEndnessLE
);
995 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I8
);
996 stmt( IRStmt_Put( OFFB_RAX
+1, e
) );
1000 /* Read/write various widths of %RAX, as it has various
1001 special-purpose uses. */
1003 static const HChar
* nameIRegRAX ( Int sz
)
1006 case 1: return "%al";
1007 case 2: return "%ax";
1008 case 4: return "%eax";
1009 case 8: return "%rax";
1010 default: vpanic("nameIRegRAX(amd64)");
1014 static IRExpr
* getIRegRAX ( Int sz
)
1016 vassert(host_endness
== VexEndnessLE
);
1018 case 1: return IRExpr_Get( OFFB_RAX
, Ity_I8
);
1019 case 2: return IRExpr_Get( OFFB_RAX
, Ity_I16
);
1020 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1021 case 8: return IRExpr_Get( OFFB_RAX
, Ity_I64
);
1022 default: vpanic("getIRegRAX(amd64)");
1026 static void putIRegRAX ( Int sz
, IRExpr
* e
)
1028 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1029 vassert(host_endness
== VexEndnessLE
);
1031 case 8: vassert(ty
== Ity_I64
);
1032 stmt( IRStmt_Put( OFFB_RAX
, e
));
1034 case 4: vassert(ty
== Ity_I32
);
1035 stmt( IRStmt_Put( OFFB_RAX
, unop(Iop_32Uto64
,e
) ));
1037 case 2: vassert(ty
== Ity_I16
);
1038 stmt( IRStmt_Put( OFFB_RAX
, e
));
1040 case 1: vassert(ty
== Ity_I8
);
1041 stmt( IRStmt_Put( OFFB_RAX
, e
));
1043 default: vpanic("putIRegRAX(amd64)");
1048 /* Read/write various widths of %RDX, as it has various
1049 special-purpose uses. */
1051 static const HChar
* nameIRegRDX ( Int sz
)
1054 case 1: return "%dl";
1055 case 2: return "%dx";
1056 case 4: return "%edx";
1057 case 8: return "%rdx";
1058 default: vpanic("nameIRegRDX(amd64)");
1062 static IRExpr
* getIRegRDX ( Int sz
)
1064 vassert(host_endness
== VexEndnessLE
);
1066 case 1: return IRExpr_Get( OFFB_RDX
, Ity_I8
);
1067 case 2: return IRExpr_Get( OFFB_RDX
, Ity_I16
);
1068 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1069 case 8: return IRExpr_Get( OFFB_RDX
, Ity_I64
);
1070 default: vpanic("getIRegRDX(amd64)");
1074 static void putIRegRDX ( Int sz
, IRExpr
* e
)
1076 vassert(host_endness
== VexEndnessLE
);
1077 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1079 case 8: stmt( IRStmt_Put( OFFB_RDX
, e
));
1081 case 4: stmt( IRStmt_Put( OFFB_RDX
, unop(Iop_32Uto64
,e
) ));
1083 case 2: stmt( IRStmt_Put( OFFB_RDX
, e
));
1085 case 1: stmt( IRStmt_Put( OFFB_RDX
, e
));
1087 default: vpanic("putIRegRDX(amd64)");
1092 /* Simplistic functions to deal with the integer registers as a
1093 straightforward bank of 16 64-bit regs. */
1095 static IRExpr
* getIReg64 ( UInt regno
)
1097 return IRExpr_Get( integerGuestReg64Offset(regno
),
1101 static void putIReg64 ( UInt regno
, IRExpr
* e
)
1103 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1104 stmt( IRStmt_Put( integerGuestReg64Offset(regno
), e
) );
1107 static const HChar
* nameIReg64 ( UInt regno
)
1109 return nameIReg( 8, regno
, False
);
1113 /* Simplistic functions to deal with the lower halves of integer
1114 registers as a straightforward bank of 16 32-bit regs. */
1116 static IRExpr
* getIReg32 ( UInt regno
)
1118 vassert(host_endness
== VexEndnessLE
);
1119 return unop(Iop_64to32
,
1120 IRExpr_Get( integerGuestReg64Offset(regno
),
1124 static void putIReg32 ( UInt regno
, IRExpr
* e
)
1126 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1127 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1128 unop(Iop_32Uto64
,e
) ) );
1131 static const HChar
* nameIReg32 ( UInt regno
)
1133 return nameIReg( 4, regno
, False
);
1137 /* Simplistic functions to deal with the lower quarters of integer
1138 registers as a straightforward bank of 16 16-bit regs. */
1140 static IRExpr
* getIReg16 ( UInt regno
)
1142 vassert(host_endness
== VexEndnessLE
);
1143 return IRExpr_Get( integerGuestReg64Offset(regno
),
1147 static void putIReg16 ( UInt regno
, IRExpr
* e
)
1149 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
1150 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1151 unop(Iop_16Uto64
,e
) ) );
1154 static const HChar
* nameIReg16 ( UInt regno
)
1156 return nameIReg( 2, regno
, False
);
1160 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1161 which field of the REX byte is to be used to extend to a 4-bit
1162 number. These functions cater for that situation.
1164 static IRExpr
* getIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1166 vassert(lo3bits
< 8);
1167 vassert(IS_VALID_PFX(pfx
));
1168 return getIReg64( lo3bits
| (getRexX(pfx
) << 3) );
1171 static const HChar
* nameIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1173 vassert(lo3bits
< 8);
1174 vassert(IS_VALID_PFX(pfx
));
1175 return nameIReg( 8, lo3bits
| (getRexX(pfx
) << 3), False
);
1178 static const HChar
* nameIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1180 vassert(lo3bits
< 8);
1181 vassert(IS_VALID_PFX(pfx
));
1182 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1183 return nameIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1184 toBool(sz
==1 && !haveREX(pfx
)) );
1187 static IRExpr
* getIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1189 vassert(lo3bits
< 8);
1190 vassert(IS_VALID_PFX(pfx
));
1191 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1194 return unop(Iop_64to32
,
1196 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1197 False
/*!irregular*/ ),
1203 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1204 toBool(sz
==1 && !haveREX(pfx
)) ),
1210 static void putIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
, IRExpr
* e
)
1212 vassert(lo3bits
< 8);
1213 vassert(IS_VALID_PFX(pfx
));
1214 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1215 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1217 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3),
1218 toBool(sz
==1 && !haveREX(pfx
)) ),
1219 sz
==4 ? unop(Iop_32Uto64
,e
) : e
1224 /* Functions for getting register numbers from modrm bytes and REX
1225 when we don't have to consider the complexities of integer subreg
1228 /* Extract the g reg field from a modRM byte, and augment it using the
1229 REX.R bit from the supplied REX byte. The R bit usually is
1230 associated with the g register field.
1232 static UInt
gregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1234 Int reg
= (Int
)( (mod_reg_rm
>> 3) & 7 );
1235 reg
+= (pfx
& PFX_REXR
) ? 8 : 0;
1239 /* Extract the e reg field from a modRM byte, and augment it using the
1240 REX.B bit from the supplied REX byte. The B bit usually is
1241 associated with the e register field (when modrm indicates e is a
1244 static UInt
eregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1247 vassert(epartIsReg(mod_reg_rm
));
1248 rm
= (Int
)(mod_reg_rm
& 0x7);
1249 rm
+= (pfx
& PFX_REXB
) ? 8 : 0;
1254 /* General functions for dealing with integer register access. */
1256 /* Produce the guest state offset for a reference to the 'g' register
1257 field in a modrm byte, taking into account REX (or its absence),
1258 and the size of the access.
1260 static UInt
offsetIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1263 vassert(host_endness
== VexEndnessLE
);
1264 vassert(IS_VALID_PFX(pfx
));
1265 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1266 reg
= gregOfRexRM( pfx
, mod_reg_rm
);
1267 return offsetIReg( sz
, reg
, toBool(sz
== 1 && !haveREX(pfx
)) );
1271 IRExpr
* getIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1275 return unop(Iop_64to32
,
1276 IRExpr_Get( offsetIRegG( sz
, pfx
, mod_reg_rm
),
1279 return IRExpr_Get( offsetIRegG( sz
, pfx
, mod_reg_rm
),
1285 void putIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1287 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1289 e
= unop(Iop_32Uto64
,e
);
1291 stmt( IRStmt_Put( offsetIRegG( sz
, pfx
, mod_reg_rm
), e
) );
1295 const HChar
* nameIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1297 return nameIReg( sz
, gregOfRexRM(pfx
,mod_reg_rm
),
1298 toBool(sz
==1 && !haveREX(pfx
)) );
1303 IRExpr
* getIRegV ( Int sz
, Prefix pfx
)
1307 return unop(Iop_64to32
,
1308 IRExpr_Get( offsetIReg( sz
, getVexNvvvv(pfx
), False
),
1311 return IRExpr_Get( offsetIReg( sz
, getVexNvvvv(pfx
), False
),
1317 void putIRegV ( Int sz
, Prefix pfx
, IRExpr
* e
)
1319 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1321 e
= unop(Iop_32Uto64
,e
);
1323 stmt( IRStmt_Put( offsetIReg( sz
, getVexNvvvv(pfx
), False
), e
) );
1327 const HChar
* nameIRegV ( Int sz
, Prefix pfx
)
1329 return nameIReg( sz
, getVexNvvvv(pfx
), False
);
1334 /* Produce the guest state offset for a reference to the 'e' register
1335 field in a modrm byte, taking into account REX (or its absence),
1336 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1337 denotes a memory access rather than a register access.
1339 static UInt
offsetIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1342 vassert(host_endness
== VexEndnessLE
);
1343 vassert(IS_VALID_PFX(pfx
));
1344 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1345 reg
= eregOfRexRM( pfx
, mod_reg_rm
);
1346 return offsetIReg( sz
, reg
, toBool(sz
== 1 && !haveREX(pfx
)) );
1350 IRExpr
* getIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1354 return unop(Iop_64to32
,
1355 IRExpr_Get( offsetIRegE( sz
, pfx
, mod_reg_rm
),
1358 return IRExpr_Get( offsetIRegE( sz
, pfx
, mod_reg_rm
),
1364 void putIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1366 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1368 e
= unop(Iop_32Uto64
,e
);
1370 stmt( IRStmt_Put( offsetIRegE( sz
, pfx
, mod_reg_rm
), e
) );
1374 const HChar
* nameIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1376 return nameIReg( sz
, eregOfRexRM(pfx
,mod_reg_rm
),
1377 toBool(sz
==1 && !haveREX(pfx
)) );
1381 /*------------------------------------------------------------*/
1382 /*--- For dealing with XMM registers ---*/
1383 /*------------------------------------------------------------*/
1385 static Int
ymmGuestRegOffset ( UInt ymmreg
)
1388 case 0: return OFFB_YMM0
;
1389 case 1: return OFFB_YMM1
;
1390 case 2: return OFFB_YMM2
;
1391 case 3: return OFFB_YMM3
;
1392 case 4: return OFFB_YMM4
;
1393 case 5: return OFFB_YMM5
;
1394 case 6: return OFFB_YMM6
;
1395 case 7: return OFFB_YMM7
;
1396 case 8: return OFFB_YMM8
;
1397 case 9: return OFFB_YMM9
;
1398 case 10: return OFFB_YMM10
;
1399 case 11: return OFFB_YMM11
;
1400 case 12: return OFFB_YMM12
;
1401 case 13: return OFFB_YMM13
;
1402 case 14: return OFFB_YMM14
;
1403 case 15: return OFFB_YMM15
;
1404 default: vpanic("ymmGuestRegOffset(amd64)");
1408 static Int
xmmGuestRegOffset ( UInt xmmreg
)
1410 /* Correct for little-endian host only. */
1411 vassert(host_endness
== VexEndnessLE
);
1412 return ymmGuestRegOffset( xmmreg
);
1415 /* Lanes of vector registers are always numbered from zero being the
1416 least significant lane (rightmost in the register). */
1418 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
1420 /* Correct for little-endian host only. */
1421 vassert(host_endness
== VexEndnessLE
);
1422 vassert(laneno
>= 0 && laneno
< 8);
1423 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
1426 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
1428 /* Correct for little-endian host only. */
1429 vassert(host_endness
== VexEndnessLE
);
1430 vassert(laneno
>= 0 && laneno
< 4);
1431 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
1434 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
1436 /* Correct for little-endian host only. */
1437 vassert(host_endness
== VexEndnessLE
);
1438 vassert(laneno
>= 0 && laneno
< 2);
1439 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
1442 static Int
ymmGuestRegLane128offset ( UInt ymmreg
, Int laneno
)
1444 /* Correct for little-endian host only. */
1445 vassert(host_endness
== VexEndnessLE
);
1446 vassert(laneno
>= 0 && laneno
< 2);
1447 return ymmGuestRegOffset( ymmreg
) + 16 * laneno
;
1450 static Int
ymmGuestRegLane64offset ( UInt ymmreg
, Int laneno
)
1452 /* Correct for little-endian host only. */
1453 vassert(host_endness
== VexEndnessLE
);
1454 vassert(laneno
>= 0 && laneno
< 4);
1455 return ymmGuestRegOffset( ymmreg
) + 8 * laneno
;
1458 static Int
ymmGuestRegLane32offset ( UInt ymmreg
, Int laneno
)
1460 /* Correct for little-endian host only. */
1461 vassert(host_endness
== VexEndnessLE
);
1462 vassert(laneno
>= 0 && laneno
< 8);
1463 return ymmGuestRegOffset( ymmreg
) + 4 * laneno
;
1466 static IRExpr
* getXMMReg ( UInt xmmreg
)
1468 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
1471 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
1473 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
1476 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
1481 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
1483 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
1486 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
1491 static IRExpr
* getXMMRegLane16 ( UInt xmmreg
, Int laneno
)
1493 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg
,laneno
), Ity_I16
);
1496 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
1498 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1499 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
1502 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1504 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1505 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1508 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1510 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1511 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1514 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1516 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1517 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1520 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1522 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1523 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1526 static IRExpr
* getYMMReg ( UInt xmmreg
)
1528 return IRExpr_Get( ymmGuestRegOffset(xmmreg
), Ity_V256
);
1531 static IRExpr
* getYMMRegLane128 ( UInt ymmreg
, Int laneno
)
1533 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg
,laneno
), Ity_V128
);
1536 static IRExpr
* getYMMRegLane64F ( UInt ymmreg
, Int laneno
)
1538 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_F64
);
1541 static IRExpr
* getYMMRegLane64 ( UInt ymmreg
, Int laneno
)
1543 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_I64
);
1546 static IRExpr
* getYMMRegLane32F ( UInt ymmreg
, Int laneno
)
1548 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_F32
);
1551 static IRExpr
* getYMMRegLane32 ( UInt ymmreg
, Int laneno
)
1553 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_I32
);
1556 static void putYMMReg ( UInt ymmreg
, IRExpr
* e
)
1558 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V256
);
1559 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg
), e
) );
1562 static void putYMMRegLane128 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1564 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1565 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg
,laneno
), e
) );
1568 static void putYMMRegLane64F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1570 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1571 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1574 static void putYMMRegLane64 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1576 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1577 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1580 static void putYMMRegLane32F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1582 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1583 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1586 static void putYMMRegLane32 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1588 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1589 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1592 static IRExpr
* mkV128 ( UShort mask
)
1594 return IRExpr_Const(IRConst_V128(mask
));
1597 /* Write the low half of a YMM reg and zero out the upper half. */
1598 static void putYMMRegLoAndZU ( UInt ymmreg
, IRExpr
* e
)
1600 putYMMRegLane128( ymmreg
, 0, e
);
1601 putYMMRegLane128( ymmreg
, 1, mkV128(0) );
1604 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
1606 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
1607 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
1608 return unop(Iop_64to1
,
1611 unop(Iop_1Uto64
,y
)));
1614 /* Generate a compare-and-swap operation, operating on memory at
1615 'addr'. The expected value is 'expVal' and the new value is
1616 'newVal'. If the operation fails, then transfer control (with a
1617 no-redir jump (XXX no -- see comment at top of this file)) to
1618 'restart_point', which is presumably the address of the guest
1619 instruction again -- retrying, essentially. */
1620 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
1621 Addr64 restart_point
)
1624 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
1625 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
1626 IRTemp oldTmp
= newTemp(tyE
);
1627 IRTemp expTmp
= newTemp(tyE
);
1628 vassert(tyE
== tyN
);
1629 vassert(tyE
== Ity_I64
|| tyE
== Ity_I32
1630 || tyE
== Ity_I16
|| tyE
== Ity_I8
);
1631 assign(expTmp
, expVal
);
1632 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
1633 NULL
, mkexpr(expTmp
), NULL
, newVal
);
1634 stmt( IRStmt_CAS(cas
) );
1636 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
1637 mkexpr(oldTmp
), mkexpr(expTmp
) ),
1638 Ijk_Boring
, /*Ijk_NoRedir*/
1639 IRConst_U64( restart_point
),
1645 /*------------------------------------------------------------*/
1646 /*--- Helpers for %rflags. ---*/
1647 /*------------------------------------------------------------*/
1649 /* -------------- Evaluating the flags-thunk. -------------- */
1651 /* Build IR to calculate all the eflags from stored
1652 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1654 static IRExpr
* mk_amd64g_calculate_rflags_all ( void )
1657 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1658 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1659 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1660 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1665 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all
,
1668 /* Exclude OP and NDEP from definedness checking. We're only
1669 interested in DEP1 and DEP2. */
1670 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1674 /* Build IR to calculate some particular condition from stored
1675 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1677 static IRExpr
* mk_amd64g_calculate_condition ( AMD64Condcode cond
)
1680 = mkIRExprVec_5( mkU64(cond
),
1681 IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1682 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1683 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1684 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1689 "amd64g_calculate_condition", &amd64g_calculate_condition
,
1692 /* Exclude the requested condition, OP and NDEP from definedness
1693 checking. We're only interested in DEP1 and DEP2. */
1694 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
1695 return unop(Iop_64to1
, call
);
1698 /* Build IR to calculate just the carry flag from stored
1699 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1700 static IRExpr
* mk_amd64g_calculate_rflags_c ( void )
1703 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1704 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1705 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1706 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1711 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c
,
1714 /* Exclude OP and NDEP from definedness checking. We're only
1715 interested in DEP1 and DEP2. */
1716 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1721 /* -------------- Building the flags-thunk. -------------- */
1723 /* The machinery in this section builds the flag-thunk following a
1724 flag-setting operation. Hence the various setFlags_* functions.
1727 static Bool
isAddSub ( IROp op8
)
1729 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1732 static Bool
isLogic ( IROp op8
)
1734 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
1737 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1738 static IRExpr
* widenUto64 ( IRExpr
* e
)
1740 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1741 case Ity_I64
: return e
;
1742 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1743 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1744 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1745 case Ity_I1
: return unop(Iop_1Uto64
, e
);
1746 default: vpanic("widenUto64");
1750 /* S-widen 8/16/32/64 bit int expr to 32. */
1751 static IRExpr
* widenSto64 ( IRExpr
* e
)
1753 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1754 case Ity_I64
: return e
;
1755 case Ity_I32
: return unop(Iop_32Sto64
, e
);
1756 case Ity_I16
: return unop(Iop_16Sto64
, e
);
1757 case Ity_I8
: return unop(Iop_8Sto64
, e
);
1758 default: vpanic("widenSto64");
1762 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1763 of these combinations make sense. */
1764 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
1766 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
1767 if (src_ty
== dst_ty
)
1769 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
1770 return unop(Iop_32to16
, e
);
1771 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
1772 return unop(Iop_32to8
, e
);
1773 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
)
1774 return unop(Iop_64to32
, e
);
1775 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I16
)
1776 return unop(Iop_64to16
, e
);
1777 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I8
)
1778 return unop(Iop_64to8
, e
);
1780 vex_printf("\nsrc, dst tys are: ");
1785 vpanic("narrowTo(amd64)");
1789 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1790 auto-sized up to the real op. */
1793 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
1797 case Ity_I8
: ccOp
= 0; break;
1798 case Ity_I16
: ccOp
= 1; break;
1799 case Ity_I32
: ccOp
= 2; break;
1800 case Ity_I64
: ccOp
= 3; break;
1801 default: vassert(0);
1804 case Iop_Add8
: ccOp
+= AMD64G_CC_OP_ADDB
; break;
1805 case Iop_Sub8
: ccOp
+= AMD64G_CC_OP_SUBB
; break;
1806 default: ppIROp(op8
);
1807 vpanic("setFlags_DEP1_DEP2(amd64)");
1809 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1810 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1811 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(dep2
))) );
1815 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1818 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
1822 case Ity_I8
: ccOp
= 0; break;
1823 case Ity_I16
: ccOp
= 1; break;
1824 case Ity_I32
: ccOp
= 2; break;
1825 case Ity_I64
: ccOp
= 3; break;
1826 default: vassert(0);
1831 case Iop_Xor8
: ccOp
+= AMD64G_CC_OP_LOGICB
; break;
1832 default: ppIROp(op8
);
1833 vpanic("setFlags_DEP1(amd64)");
1835 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1836 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1837 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
1841 /* For shift operations, we put in the result and the undershifted
1842 result. Except if the shift amount is zero, the thunk is left
1845 static void setFlags_DEP1_DEP2_shift ( IROp op64
,
1853 case Ity_I8
: ccOp
= 0; break;
1854 case Ity_I16
: ccOp
= 1; break;
1855 case Ity_I32
: ccOp
= 2; break;
1856 case Ity_I64
: ccOp
= 3; break;
1857 default: vassert(0);
1862 /* Both kinds of right shifts are handled by the same thunk
1866 case Iop_Sar64
: ccOp
+= AMD64G_CC_OP_SHRB
; break;
1867 case Iop_Shl64
: ccOp
+= AMD64G_CC_OP_SHLB
; break;
1868 default: ppIROp(op64
);
1869 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1872 /* guard :: Ity_I8. We need to convert it to I1. */
1873 IRTemp guardB
= newTemp(Ity_I1
);
1874 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
1876 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1877 stmt( IRStmt_Put( OFFB_CC_OP
,
1878 IRExpr_ITE( mkexpr(guardB
),
1880 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
1881 stmt( IRStmt_Put( OFFB_CC_DEP1
,
1882 IRExpr_ITE( mkexpr(guardB
),
1883 widenUto64(mkexpr(res
)),
1884 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
1885 stmt( IRStmt_Put( OFFB_CC_DEP2
,
1886 IRExpr_ITE( mkexpr(guardB
),
1887 widenUto64(mkexpr(resUS
)),
1888 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
1892 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1893 the former value of the carry flag, which unfortunately we have to
1896 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1898 Int ccOp
= inc
? AMD64G_CC_OP_INCB
: AMD64G_CC_OP_DECB
;
1901 case Ity_I8
: ccOp
+= 0; break;
1902 case Ity_I16
: ccOp
+= 1; break;
1903 case Ity_I32
: ccOp
+= 2; break;
1904 case Ity_I64
: ccOp
+= 3; break;
1905 default: vassert(0);
1908 /* This has to come first, because calculating the C flag
1909 may require reading all four thunk fields. */
1910 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_amd64g_calculate_rflags_c()) );
1911 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1912 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(res
))) );
1913 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
1917 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1921 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, ULong base_op
)
1925 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+0) ) );
1928 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+1) ) );
1931 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+2) ) );
1934 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+3) ) );
1937 vpanic("setFlags_MUL(amd64)");
1939 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(arg1
)) ));
1940 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(arg2
)) ));
1944 /* -------------- Condition codes. -------------- */
1946 /* Condition codes, using the AMD encoding. */
1948 static const HChar
* name_AMD64Condcode ( AMD64Condcode cond
)
1951 case AMD64CondO
: return "o";
1952 case AMD64CondNO
: return "no";
1953 case AMD64CondB
: return "b";
1954 case AMD64CondNB
: return "ae"; /*"nb";*/
1955 case AMD64CondZ
: return "e"; /*"z";*/
1956 case AMD64CondNZ
: return "ne"; /*"nz";*/
1957 case AMD64CondBE
: return "be";
1958 case AMD64CondNBE
: return "a"; /*"nbe";*/
1959 case AMD64CondS
: return "s";
1960 case AMD64CondNS
: return "ns";
1961 case AMD64CondP
: return "p";
1962 case AMD64CondNP
: return "np";
1963 case AMD64CondL
: return "l";
1964 case AMD64CondNL
: return "ge"; /*"nl";*/
1965 case AMD64CondLE
: return "le";
1966 case AMD64CondNLE
: return "g"; /*"nle";*/
1967 case AMD64CondAlways
: return "ALWAYS";
1968 default: vpanic("name_AMD64Condcode");
1973 AMD64Condcode
positiveIse_AMD64Condcode ( AMD64Condcode cond
,
1974 /*OUT*/Bool
* needInvert
)
1976 vassert(cond
>= AMD64CondO
&& cond
<= AMD64CondNLE
);
1981 *needInvert
= False
;
1987 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1989 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1992 Optionally, generate a store for the 'tres' value. This can either
1993 be a normal store, or it can be a cas-with-possible-failure style
1996 if taddr is IRTemp_INVALID, then no store is generated.
1998 if taddr is not IRTemp_INVALID, then a store (using taddr as
1999 the address) is generated:
2001 if texpVal is IRTemp_INVALID then a normal store is
2002 generated, and restart_point must be zero (it is irrelevant).
2004 if texpVal is not IRTemp_INVALID then a cas-style store is
2005 generated. texpVal is the expected value, restart_point
2006 is the restart point if the store fails, and texpVal must
2007 have the same type as tres.
2010 static void helper_ADC ( Int sz
,
2011 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2012 /* info about optional store: */
2013 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2016 IRType ty
= szToITy(sz
);
2017 IRTemp oldc
= newTemp(Ity_I64
);
2018 IRTemp oldcn
= newTemp(ty
);
2019 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2020 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2022 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2025 case 8: thunkOp
= AMD64G_CC_OP_ADCQ
; break;
2026 case 4: thunkOp
= AMD64G_CC_OP_ADCL
; break;
2027 case 2: thunkOp
= AMD64G_CC_OP_ADCW
; break;
2028 case 1: thunkOp
= AMD64G_CC_OP_ADCB
; break;
2029 default: vassert(0);
2032 /* oldc = old carry flag, 0 or 1 */
2033 assign( oldc
, binop(Iop_And64
,
2034 mk_amd64g_calculate_rflags_c(),
2037 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2039 assign( tres
, binop(plus
,
2040 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2043 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2044 start of this function. */
2045 if (taddr
!= IRTemp_INVALID
) {
2046 if (texpVal
== IRTemp_INVALID
) {
2047 vassert(restart_point
== 0);
2048 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2050 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2051 /* .. and hence 'texpVal' has the same type as 'tres'. */
2052 casLE( mkexpr(taddr
),
2053 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2057 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2058 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2059 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2060 mkexpr(oldcn
)) )) );
2061 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2065 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2066 appropriately. As with helper_ADC, possibly generate a store of
2067 the result -- see comments on helper_ADC for details.
2069 static void helper_SBB ( Int sz
,
2070 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2071 /* info about optional store: */
2072 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2075 IRType ty
= szToITy(sz
);
2076 IRTemp oldc
= newTemp(Ity_I64
);
2077 IRTemp oldcn
= newTemp(ty
);
2078 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
2079 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2081 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2084 case 8: thunkOp
= AMD64G_CC_OP_SBBQ
; break;
2085 case 4: thunkOp
= AMD64G_CC_OP_SBBL
; break;
2086 case 2: thunkOp
= AMD64G_CC_OP_SBBW
; break;
2087 case 1: thunkOp
= AMD64G_CC_OP_SBBB
; break;
2088 default: vassert(0);
2091 /* oldc = old carry flag, 0 or 1 */
2092 assign( oldc
, binop(Iop_And64
,
2093 mk_amd64g_calculate_rflags_c(),
2096 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2098 assign( tres
, binop(minus
,
2099 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
2102 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2103 start of this function. */
2104 if (taddr
!= IRTemp_INVALID
) {
2105 if (texpVal
== IRTemp_INVALID
) {
2106 vassert(restart_point
== 0);
2107 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2109 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2110 /* .. and hence 'texpVal' has the same type as 'tres'. */
2111 casLE( mkexpr(taddr
),
2112 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2116 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2117 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
) )) );
2118 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2119 mkexpr(oldcn
)) )) );
2120 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2124 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2125 and set flags appropriately.
2127 static void helper_ADCX_ADOX ( Bool isADCX
, Int sz
,
2128 IRTemp tres
, IRTemp ta1
, IRTemp ta2
)
2131 IRType ty
= szToITy(sz
);
2132 IRTemp oldflags
= newTemp(Ity_I64
);
2133 IRTemp oldOC
= newTemp(Ity_I64
); // old O or C flag
2134 IRTemp oldOCn
= newTemp(ty
); // old O or C flag, narrowed
2135 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2136 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2138 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2141 case 8: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX64
2142 : AMD64G_CC_OP_ADOX64
; break;
2143 case 4: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX32
2144 : AMD64G_CC_OP_ADOX32
; break;
2145 default: vassert(0);
2148 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
2150 /* oldOC = old overflow/carry flag, 0 or 1 */
2151 assign( oldOC
, binop(Iop_And64
,
2154 mkU8(isADCX
? AMD64G_CC_SHIFT_C
2155 : AMD64G_CC_SHIFT_O
)),
2158 assign( oldOCn
, narrowTo(ty
, mkexpr(oldOC
)) );
2160 assign( tres
, binop(plus
,
2161 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2164 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2165 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2166 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2167 mkexpr(oldOCn
)) )) );
2168 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldflags
) ) );
2172 /* -------------- Helpers for disassembly printing. -------------- */
2174 static const HChar
* nameGrp1 ( Int opc_aux
)
2176 static const HChar
* grp1_names
[8]
2177 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2178 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(amd64)");
2179 return grp1_names
[opc_aux
];
2182 static const HChar
* nameGrp2 ( Int opc_aux
)
2184 static const HChar
* grp2_names
[8]
2185 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2186 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(amd64)");
2187 return grp2_names
[opc_aux
];
2190 static const HChar
* nameGrp4 ( Int opc_aux
)
2192 static const HChar
* grp4_names
[8]
2193 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2194 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(amd64)");
2195 return grp4_names
[opc_aux
];
2198 static const HChar
* nameGrp5 ( Int opc_aux
)
2200 static const HChar
* grp5_names
[8]
2201 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2202 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(amd64)");
2203 return grp5_names
[opc_aux
];
2206 static const HChar
* nameGrp8 ( Int opc_aux
)
2208 static const HChar
* grp8_names
[8]
2209 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2210 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(amd64)");
2211 return grp8_names
[opc_aux
];
2214 static const HChar
* nameSReg ( UInt sreg
)
2217 case R_ES
: return "%es";
2218 case R_CS
: return "%cs";
2219 case R_SS
: return "%ss";
2220 case R_DS
: return "%ds";
2221 case R_FS
: return "%fs";
2222 case R_GS
: return "%gs";
2223 default: vpanic("nameSReg(amd64)");
2227 static const HChar
* nameMMXReg ( Int mmxreg
)
2229 static const HChar
* mmx_names
[8]
2230 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2231 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(amd64,guest)");
2232 return mmx_names
[mmxreg
];
2235 static const HChar
* nameXMMReg ( Int xmmreg
)
2237 static const HChar
* xmm_names
[16]
2238 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2239 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2240 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2241 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2242 if (xmmreg
< 0 || xmmreg
> 15) vpanic("nameXMMReg(amd64)");
2243 return xmm_names
[xmmreg
];
2246 static const HChar
* nameMMXGran ( Int gran
)
2253 default: vpanic("nameMMXGran(amd64,guest)");
2257 static HChar
nameISize ( Int size
)
2264 default: vpanic("nameISize(amd64)");
2268 static const HChar
* nameYMMReg ( Int ymmreg
)
2270 static const HChar
* ymm_names
[16]
2271 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2272 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2273 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2274 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2275 if (ymmreg
< 0 || ymmreg
> 15) vpanic("nameYMMReg(amd64)");
2276 return ymm_names
[ymmreg
];
2280 /*------------------------------------------------------------*/
2281 /*--- JMP helpers ---*/
2282 /*------------------------------------------------------------*/
2284 static void jmp_lit( /*MOD*/DisResult
* dres
,
2285 IRJumpKind kind
, Addr64 d64
)
2287 vassert(dres
->whatNext
== Dis_Continue
);
2288 vassert(dres
->len
== 0);
2289 vassert(dres
->continueAt
== 0);
2290 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2291 dres
->whatNext
= Dis_StopHere
;
2292 dres
->jk_StopHere
= kind
;
2293 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64
) ) );
2296 static void jmp_treg( /*MOD*/DisResult
* dres
,
2297 IRJumpKind kind
, IRTemp t
)
2299 vassert(dres
->whatNext
== Dis_Continue
);
2300 vassert(dres
->len
== 0);
2301 vassert(dres
->continueAt
== 0);
2302 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2303 dres
->whatNext
= Dis_StopHere
;
2304 dres
->jk_StopHere
= kind
;
2305 stmt( IRStmt_Put( OFFB_RIP
, mkexpr(t
) ) );
2309 void jcc_01 ( /*MOD*/DisResult
* dres
,
2310 AMD64Condcode cond
, Addr64 d64_false
, Addr64 d64_true
)
2313 AMD64Condcode condPos
;
2314 vassert(dres
->whatNext
== Dis_Continue
);
2315 vassert(dres
->len
== 0);
2316 vassert(dres
->continueAt
== 0);
2317 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2318 dres
->whatNext
= Dis_StopHere
;
2319 dres
->jk_StopHere
= Ijk_Boring
;
2320 condPos
= positiveIse_AMD64Condcode ( cond
, &invert
);
2322 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2324 IRConst_U64(d64_false
),
2326 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_true
) ) );
2328 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2330 IRConst_U64(d64_true
),
2332 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_false
) ) );
2336 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2337 guest address of the next instruction to be executed.
2339 This function generates an AbiHint to say that -128(%rsp)
2340 .. -1(%rsp) should now be regarded as uninitialised.
2343 void make_redzone_AbiHint ( const VexAbiInfo
* vbi
,
2344 IRTemp new_rsp
, IRTemp nia
, const HChar
* who
)
2346 Int szB
= vbi
->guest_stack_redzone_size
;
2349 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2350 for is ELF. So just check it's the expected 128 value
2352 vassert(szB
== 128);
2354 if (0) vex_printf("AbiHint: %s\n", who
);
2355 vassert(typeOfIRTemp(irsb
->tyenv
, new_rsp
) == Ity_I64
);
2356 vassert(typeOfIRTemp(irsb
->tyenv
, nia
) == Ity_I64
);
2358 stmt( IRStmt_AbiHint(
2359 binop(Iop_Sub64
, mkexpr(new_rsp
), mkU64(szB
)),
2366 /*------------------------------------------------------------*/
2367 /*--- Disassembling addressing modes ---*/
2368 /*------------------------------------------------------------*/
2371 const HChar
* segRegTxt ( Prefix pfx
)
2373 if (pfx
& PFX_CS
) return "%cs:";
2374 if (pfx
& PFX_DS
) return "%ds:";
2375 if (pfx
& PFX_ES
) return "%es:";
2376 if (pfx
& PFX_FS
) return "%fs:";
2377 if (pfx
& PFX_GS
) return "%gs:";
2378 if (pfx
& PFX_SS
) return "%ss:";
2379 return ""; /* no override */
2383 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2384 linear address by adding any required segment override as indicated
2385 by sorb, and also dealing with any address size override
2388 IRExpr
* handleAddrOverrides ( const VexAbiInfo
* vbi
,
2389 Prefix pfx
, IRExpr
* virtual )
2391 /* --- address size override --- */
2393 virtual = unop(Iop_32Uto64
, unop(Iop_64to32
, virtual));
2395 /* Note that the below are hacks that relies on the assumption
2396 that %fs or %gs are constant.
2397 Typically, %fs is always 0x63 on linux (in the main thread, it
2398 stays at value 0), %gs always 0x60 on Darwin, ... */
2399 /* --- segment overrides --- */
2401 if (vbi
->guest_amd64_assume_fs_is_const
) {
2402 /* return virtual + guest_FS_CONST. */
2403 virtual = binop(Iop_Add64
, virtual,
2404 IRExpr_Get(OFFB_FS_CONST
, Ity_I64
));
2406 unimplemented("amd64 %fs segment override");
2411 if (vbi
->guest_amd64_assume_gs_is_const
) {
2412 /* return virtual + guest_GS_CONST. */
2413 virtual = binop(Iop_Add64
, virtual,
2414 IRExpr_Get(OFFB_GS_CONST
, Ity_I64
));
2416 unimplemented("amd64 %gs segment override");
2420 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2427 //.. IRType hWordTy;
2428 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2431 //.. /* the common case - no override */
2432 //.. return virtual;
2434 //.. switch (sorb) {
2435 //.. case 0x3E: sreg = R_DS; break;
2436 //.. case 0x26: sreg = R_ES; break;
2437 //.. case 0x64: sreg = R_FS; break;
2438 //.. case 0x65: sreg = R_GS; break;
2439 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2442 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2444 //.. seg_selector = newTemp(Ity_I32);
2445 //.. ldt_ptr = newTemp(hWordTy);
2446 //.. gdt_ptr = newTemp(hWordTy);
2447 //.. r64 = newTemp(Ity_I64);
2449 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2450 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2451 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2454 //.. Call this to do the translation and limit checks:
2455 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2456 //.. UInt seg_selector, UInt virtual_addr )
2463 //.. "x86g_use_seg_selector",
2464 //.. &x86g_use_seg_selector,
2465 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2466 //.. mkexpr(seg_selector), virtual)
2470 //.. /* If the high 32 of the result are non-zero, there was a
2471 //.. failure in address translation. In which case, make a
2476 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2478 //.. IRConst_U32( guest_eip_curr_instr )
2482 //.. /* otherwise, here's the translated result. */
2483 //.. return unop(Iop_64to32, mkexpr(r64));
2487 /* Generate IR to calculate an address indicated by a ModRM and
2488 following SIB bytes. The expression, and the number of bytes in
2489 the address mode, are returned (the latter in *len). Note that
2490 this fn should not be called if the R/M part of the address denotes
2491 a register instead of memory. If print_codegen is true, text of
2492 the addressing mode is placed in buf.
2494 The computed address is stored in a new tempreg, and the
2495 identity of the tempreg is returned.
2497 extra_bytes holds the number of bytes after the amode, as supplied
2498 by the caller. This is needed to make sense of %rip-relative
2499 addresses. Note that the value that *len is set to is only the
2500 length of the amode itself and does not include the value supplied
2504 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr64
)
2506 IRTemp tmp
= newTemp(Ity_I64
);
2507 assign( tmp
, addr64
);
2512 IRTemp
disAMode ( /*OUT*/Int
* len
,
2513 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2514 /*OUT*/HChar
* buf
, Int extra_bytes
)
2516 UChar mod_reg_rm
= getUChar(delta
);
2520 vassert(extra_bytes
>= 0 && extra_bytes
< 10);
2522 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2523 jump table seems a bit excessive.
2525 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2526 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2527 /* is now XX0XXYYY */
2528 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2529 switch (mod_reg_rm
) {
2531 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2532 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2534 case 0x00: case 0x01: case 0x02: case 0x03:
2535 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2536 { UChar rm
= toUChar(mod_reg_rm
& 7);
2537 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2539 return disAMode_copy2tmp(
2540 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,rm
)));
2543 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2544 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2546 case 0x08: case 0x09: case 0x0A: case 0x0B:
2547 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2548 { UChar rm
= toUChar(mod_reg_rm
& 7);
2549 Long d
= getSDisp8(delta
);
2551 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2553 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2556 return disAMode_copy2tmp(
2557 handleAddrOverrides(vbi
, pfx
,
2558 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2561 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2562 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2564 case 0x10: case 0x11: case 0x12: case 0x13:
2565 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2566 { UChar rm
= toUChar(mod_reg_rm
& 7);
2567 Long d
= getSDisp32(delta
);
2568 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2570 return disAMode_copy2tmp(
2571 handleAddrOverrides(vbi
, pfx
,
2572 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2575 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2576 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2577 case 0x18: case 0x19: case 0x1A: case 0x1B:
2578 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2579 vpanic("disAMode(amd64): not an addr!");
2581 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2582 correctly at the start of handling each instruction. */
2584 { Long d
= getSDisp32(delta
);
2586 DIS(buf
, "%s%lld(%%rip)", segRegTxt(pfx
), d
);
2587 /* We need to know the next instruction's start address.
2588 Try and figure out what it is, record the guess, and ask
2589 the top-level driver logic (bbToIR_AMD64) to check we
2590 guessed right, after the instruction is completely
2592 guest_RIP_next_mustcheck
= True
;
2593 guest_RIP_next_assumed
= guest_RIP_bbstart
2594 + delta
+4 + extra_bytes
;
2595 return disAMode_copy2tmp(
2596 handleAddrOverrides(vbi
, pfx
,
2597 binop(Iop_Add64
, mkU64(guest_RIP_next_assumed
),
2602 /* SIB, with no displacement. Special cases:
2603 -- %rsp cannot act as an index value.
2604 If index_r indicates %rsp, zero is used for the index.
2605 -- when mod is zero and base indicates RBP or R13, base is
2606 instead a 32-bit sign-extended literal.
2607 It's all madness, I tell you. Extract %index, %base and
2608 scale from the SIB byte. The value denoted is then:
2609 | %index == %RSP && (%base == %RBP || %base == %R13)
2610 = d32 following SIB byte
2611 | %index == %RSP && !(%base == %RBP || %base == %R13)
2613 | %index != %RSP && (%base == %RBP || %base == %R13)
2614 = d32 following SIB byte + (%index << scale)
2615 | %index != %RSP && !(%base == %RBP || %base == %R13)
2616 = %base + (%index << scale)
2618 UChar sib
= getUChar(delta
);
2619 UChar scale
= toUChar((sib
>> 6) & 3);
2620 UChar index_r
= toUChar((sib
>> 3) & 7);
2621 UChar base_r
= toUChar(sib
& 7);
2622 /* correct since #(R13) == 8 + #(RBP) */
2623 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2624 Bool index_is_SP
= toBool(index_r
== R_RSP
&& 0==getRexX(pfx
));
2627 if ((!index_is_SP
) && (!base_is_BPor13
)) {
2629 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2630 nameIRegRexB(8,pfx
,base_r
),
2631 nameIReg64rexX(pfx
,index_r
));
2633 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2634 nameIRegRexB(8,pfx
,base_r
),
2635 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2640 handleAddrOverrides(vbi
, pfx
,
2642 getIRegRexB(8,pfx
,base_r
),
2643 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2647 if ((!index_is_SP
) && base_is_BPor13
) {
2648 Long d
= getSDisp32(delta
);
2649 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
,
2650 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2654 handleAddrOverrides(vbi
, pfx
,
2656 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2661 if (index_is_SP
&& (!base_is_BPor13
)) {
2662 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,base_r
));
2664 return disAMode_copy2tmp(
2665 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,base_r
)));
2668 if (index_is_SP
&& base_is_BPor13
) {
2669 Long d
= getSDisp32(delta
);
2670 DIS(buf
, "%s%lld", segRegTxt(pfx
), d
);
2672 return disAMode_copy2tmp(
2673 handleAddrOverrides(vbi
, pfx
, mkU64(d
)));
2679 /* SIB, with 8-bit displacement. Special cases:
2680 -- %esp cannot act as an index value.
2681 If index_r indicates %esp, zero is used for the index.
2686 = d8 + %base + (%index << scale)
2689 UChar sib
= getUChar(delta
);
2690 UChar scale
= toUChar((sib
>> 6) & 3);
2691 UChar index_r
= toUChar((sib
>> 3) & 7);
2692 UChar base_r
= toUChar(sib
& 7);
2693 Long d
= getSDisp8(delta
+1);
2695 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2696 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2697 d
, nameIRegRexB(8,pfx
,base_r
));
2699 return disAMode_copy2tmp(
2700 handleAddrOverrides(vbi
, pfx
,
2701 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2704 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2705 nameIRegRexB(8,pfx
,base_r
),
2706 nameIReg64rexX(pfx
,index_r
));
2708 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2709 nameIRegRexB(8,pfx
,base_r
),
2710 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2715 handleAddrOverrides(vbi
, pfx
,
2718 getIRegRexB(8,pfx
,base_r
),
2720 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2723 vassert(0); /*NOTREACHED*/
2726 /* SIB, with 32-bit displacement. Special cases:
2727 -- %rsp cannot act as an index value.
2728 If index_r indicates %rsp, zero is used for the index.
2733 = d32 + %base + (%index << scale)
2736 UChar sib
= getUChar(delta
);
2737 UChar scale
= toUChar((sib
>> 6) & 3);
2738 UChar index_r
= toUChar((sib
>> 3) & 7);
2739 UChar base_r
= toUChar(sib
& 7);
2740 Long d
= getSDisp32(delta
+1);
2742 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2743 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2744 d
, nameIRegRexB(8,pfx
,base_r
));
2746 return disAMode_copy2tmp(
2747 handleAddrOverrides(vbi
, pfx
,
2748 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2751 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2752 nameIRegRexB(8,pfx
,base_r
),
2753 nameIReg64rexX(pfx
,index_r
));
2755 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2756 nameIRegRexB(8,pfx
,base_r
),
2757 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2762 handleAddrOverrides(vbi
, pfx
,
2765 getIRegRexB(8,pfx
,base_r
),
2767 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2770 vassert(0); /*NOTREACHED*/
2774 vpanic("disAMode(amd64)");
2775 return 0; /*notreached*/
2780 /* Similarly for VSIB addressing. This returns just the addend,
2781 and fills in *rI and *vscale with the register number of the vector
2782 index and its multiplicand. */
2784 IRTemp
disAVSIBMode ( /*OUT*/Int
* len
,
2785 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2786 /*OUT*/HChar
* buf
, /*OUT*/UInt
* rI
,
2787 IRType ty
, /*OUT*/Int
* vscale
)
2789 UChar mod_reg_rm
= getUChar(delta
);
2790 const HChar
*vindex
;
2796 if ((mod_reg_rm
& 7) != 4 || epartIsReg(mod_reg_rm
))
2797 return IRTemp_INVALID
;
2799 UChar sib
= getUChar(delta
+1);
2800 UChar scale
= toUChar((sib
>> 6) & 3);
2801 UChar index_r
= toUChar((sib
>> 3) & 7);
2802 UChar base_r
= toUChar(sib
& 7);
2804 /* correct since #(R13) == 8 + #(RBP) */
2805 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2809 *rI
= index_r
| (getRexX(pfx
) << 3);
2811 vindex
= nameXMMReg(*rI
);
2813 vindex
= nameYMMReg(*rI
);
2816 switch (mod_reg_rm
>> 6) {
2818 if (base_is_BPor13
) {
2819 d
= getSDisp32(delta
);
2822 DIS(buf
, "%s%lld(,%s)", segRegTxt(pfx
), d
, vindex
);
2824 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
, vindex
, 1<<scale
);
2826 return disAMode_copy2tmp( mkU64(d
) );
2829 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2830 nameIRegRexB(8,pfx
,base_r
), vindex
);
2832 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2833 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2838 d
= getSDisp8(delta
);
2842 d
= getSDisp32(delta
);
2846 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2847 nameIRegRexB(8,pfx
,base_r
), vindex
);
2849 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2850 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2856 return disAMode_copy2tmp( getIRegRexB(8,pfx
,base_r
) );
2857 return disAMode_copy2tmp( binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
),
2862 /* Figure out the number of (insn-stream) bytes constituting the amode
2863 beginning at delta. Is useful for getting hold of literals beyond
2864 the end of the amode before it has been disassembled. */
2866 static UInt
lengthAMode ( Prefix pfx
, Long delta
)
2868 UChar mod_reg_rm
= getUChar(delta
);
2871 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2872 jump table seems a bit excessive.
2874 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2875 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2876 /* is now XX0XXYYY */
2877 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2878 switch (mod_reg_rm
) {
2880 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2881 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2883 case 0x00: case 0x01: case 0x02: case 0x03:
2884 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2887 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2888 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2890 case 0x08: case 0x09: case 0x0A: case 0x0B:
2891 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2894 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2895 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2897 case 0x10: case 0x11: case 0x12: case 0x13:
2898 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2901 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2902 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2903 /* Not an address, but still handled. */
2904 case 0x18: case 0x19: case 0x1A: case 0x1B:
2905 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2913 /* SIB, with no displacement. */
2914 UChar sib
= getUChar(delta
);
2915 UChar base_r
= toUChar(sib
& 7);
2916 /* correct since #(R13) == 8 + #(RBP) */
2917 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2919 if (base_is_BPor13
) {
2926 /* SIB, with 8-bit displacement. */
2930 /* SIB, with 32-bit displacement. */
2935 vpanic("lengthAMode(amd64)");
2936 return 0; /*notreached*/
2941 /*------------------------------------------------------------*/
2942 /*--- Disassembling common idioms ---*/
2943 /*------------------------------------------------------------*/
2946 enum { WithFlagNone
=2, WithFlagCarry
, WithFlagCarryX
, WithFlagOverX
}
2949 /* Handle binary integer instructions of the form
2952 Is passed the a ptr to the modRM byte, the actual operation, and the
2953 data size. Returns the address advanced completely over this
2956 E(src) is reg-or-mem
2959 If E is reg, --> GET %G, tmp
2963 If E is mem and OP is not reversible,
2964 --> (getAddr E) -> tmpa
2970 If E is mem and OP is reversible
2971 --> (getAddr E) -> tmpa
2977 ULong
dis_op2_E_G ( const VexAbiInfo
* vbi
,
2984 const HChar
* t_amd64opc
)
2988 IRType ty
= szToITy(size
);
2989 IRTemp dst1
= newTemp(ty
);
2990 IRTemp src
= newTemp(ty
);
2991 IRTemp dst0
= newTemp(ty
);
2992 UChar rm
= getUChar(delta0
);
2993 IRTemp addr
= IRTemp_INVALID
;
2995 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
2999 case WithFlagNone
: case WithFlagCarry
:
3000 case WithFlagCarryX
: case WithFlagOverX
:
3008 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3009 if (flag
== WithFlagCarry
) vassert(keep
);
3012 vassert(flag
== WithFlagNone
);
3014 case Iop_Or8
: case Iop_Xor8
:
3015 vassert(flag
== WithFlagNone
);
3022 if (epartIsReg(rm
)) {
3023 /* Specially handle XOR reg,reg, because that doesn't really
3024 depend on reg, and doing the obvious thing potentially
3025 generates a spurious value check failure due to the bogus
3026 dependency. Ditto SUB/SBB reg,reg. */
3027 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3028 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3029 putIRegG(size
,pfx
,rm
, mkU(ty
,0));
3032 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3033 assign( src
, getIRegE(size
,pfx
,rm
) );
3035 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3036 helper_ADC( size
, dst1
, dst0
, src
,
3037 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3038 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3040 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3041 helper_SBB( size
, dst1
, dst0
, src
,
3042 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3043 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3045 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3046 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3047 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3049 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3050 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3051 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3053 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3055 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3057 setFlags_DEP1(op8
, dst1
, ty
);
3059 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3062 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3063 nameIRegE(size
,pfx
,rm
),
3064 nameIRegG(size
,pfx
,rm
));
3067 /* E refers to memory */
3068 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3069 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3070 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
3072 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3073 helper_ADC( size
, dst1
, dst0
, src
,
3074 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3075 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3077 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3078 helper_SBB( size
, dst1
, dst0
, src
,
3079 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3080 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3082 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3083 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3084 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3086 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3087 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3088 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3090 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3092 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3094 setFlags_DEP1(op8
, dst1
, ty
);
3096 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3099 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3100 dis_buf
, nameIRegG(size
, pfx
, rm
));
3107 /* Handle binary integer instructions of the form
3110 Is passed the a ptr to the modRM byte, the actual operation, and the
3111 data size. Returns the address advanced completely over this
3115 E(dst) is reg-or-mem
3117 If E is reg, --> GET %E, tmp
3121 If E is mem, --> (getAddr E) -> tmpa
3127 ULong
dis_op2_G_E ( const VexAbiInfo
* vbi
,
3134 const HChar
* t_amd64opc
)
3138 IRType ty
= szToITy(size
);
3139 IRTemp dst1
= newTemp(ty
);
3140 IRTemp src
= newTemp(ty
);
3141 IRTemp dst0
= newTemp(ty
);
3142 UChar rm
= getUChar(delta0
);
3143 IRTemp addr
= IRTemp_INVALID
;
3145 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3148 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3152 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3153 if (flag
== WithFlagCarry
) vassert(keep
);
3155 case Iop_And8
: case Iop_Or8
: case Iop_Xor8
:
3156 vassert(flag
== WithFlagNone
);
3163 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3164 intended operation is add-with-carry or subtract-with-borrow. */
3166 if (epartIsReg(rm
)) {
3167 /* Specially handle XOR reg,reg, because that doesn't really
3168 depend on reg, and doing the obvious thing potentially
3169 generates a spurious value check failure due to the bogus
3170 dependency. Ditto SUB/SBB reg,reg. */
3171 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3172 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3173 putIRegE(size
,pfx
,rm
, mkU(ty
,0));
3176 assign(dst0
, getIRegE(size
,pfx
,rm
));
3177 assign(src
, getIRegG(size
,pfx
,rm
));
3179 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3180 helper_ADC( size
, dst1
, dst0
, src
,
3181 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3182 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3184 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3185 helper_SBB( size
, dst1
, dst0
, src
,
3186 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3187 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3189 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3191 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3193 setFlags_DEP1(op8
, dst1
, ty
);
3195 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3198 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3199 nameIRegG(size
,pfx
,rm
),
3200 nameIRegE(size
,pfx
,rm
));
3204 /* E refers to memory */
3206 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3207 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3208 assign(src
, getIRegG(size
,pfx
,rm
));
3210 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3211 if (haveLOCK(pfx
)) {
3212 /* cas-style store */
3213 helper_ADC( size
, dst1
, dst0
, src
,
3214 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3217 helper_ADC( size
, dst1
, dst0
, src
,
3218 /*store*/addr
, IRTemp_INVALID
, 0 );
3221 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3222 if (haveLOCK(pfx
)) {
3223 /* cas-style store */
3224 helper_SBB( size
, dst1
, dst0
, src
,
3225 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3228 helper_SBB( size
, dst1
, dst0
, src
,
3229 /*store*/addr
, IRTemp_INVALID
, 0 );
3232 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3234 if (haveLOCK(pfx
)) {
3235 if (0) vex_printf("locked case\n" );
3236 casLE( mkexpr(addr
),
3237 mkexpr(dst0
)/*expval*/,
3238 mkexpr(dst1
)/*newval*/, guest_RIP_curr_instr
);
3240 if (0) vex_printf("nonlocked case\n");
3241 storeLE(mkexpr(addr
), mkexpr(dst1
));
3245 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3247 setFlags_DEP1(op8
, dst1
, ty
);
3250 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3251 nameIRegG(size
,pfx
,rm
), dis_buf
);
3257 /* Handle move instructions of the form
3260 Is passed the a ptr to the modRM byte, and the data size. Returns
3261 the address advanced completely over this instruction.
3263 E(src) is reg-or-mem
3266 If E is reg, --> GET %E, tmpv
3269 If E is mem --> (getAddr E) -> tmpa
3274 ULong
dis_mov_E_G ( const VexAbiInfo
* vbi
,
3280 UChar rm
= getUChar(delta0
);
3283 if (epartIsReg(rm
)) {
3284 putIRegG(size
, pfx
, rm
, getIRegE(size
, pfx
, rm
));
3285 DIP("mov%c %s,%s\n", nameISize(size
),
3286 nameIRegE(size
,pfx
,rm
),
3287 nameIRegG(size
,pfx
,rm
));
3291 /* E refers to memory */
3293 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3294 putIRegG(size
, pfx
, rm
, loadLE(szToITy(size
), mkexpr(addr
)));
3295 DIP("mov%c %s,%s\n", nameISize(size
),
3297 nameIRegG(size
,pfx
,rm
));
3303 /* Handle move instructions of the form
3306 Is passed the a ptr to the modRM byte, and the data size. Returns
3307 the address advanced completely over this instruction.
3308 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3311 E(dst) is reg-or-mem
3313 If E is reg, --> GET %G, tmp
3316 If E is mem, --> (getAddr E) -> tmpa
3321 ULong
dis_mov_G_E ( const VexAbiInfo
* vbi
,
3328 UChar rm
= getUChar(delta0
);
3333 if (epartIsReg(rm
)) {
3334 if (haveF2orF3(pfx
)) { *ok
= False
; return delta0
; }
3335 putIRegE(size
, pfx
, rm
, getIRegG(size
, pfx
, rm
));
3336 DIP("mov%c %s,%s\n", nameISize(size
),
3337 nameIRegG(size
,pfx
,rm
),
3338 nameIRegE(size
,pfx
,rm
));
3342 /* E refers to memory */
3344 if (haveF2(pfx
)) { *ok
= False
; return delta0
; }
3345 /* F3(XRELEASE) is acceptable, though. */
3346 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3347 storeLE( mkexpr(addr
), getIRegG(size
, pfx
, rm
) );
3348 DIP("mov%c %s,%s\n", nameISize(size
),
3349 nameIRegG(size
,pfx
,rm
),
3356 /* op $immediate, AL/AX/EAX/RAX. */
3358 ULong
dis_op_imm_A ( Int size
,
3363 const HChar
* t_amd64opc
)
3365 Int size4
= imin(size
,4);
3366 IRType ty
= szToITy(size
);
3367 IRTemp dst0
= newTemp(ty
);
3368 IRTemp src
= newTemp(ty
);
3369 IRTemp dst1
= newTemp(ty
);
3370 Long lit
= getSDisp(size4
,delta
);
3371 assign(dst0
, getIRegRAX(size
));
3372 assign(src
, mkU(ty
,lit
& mkSizeMask(size
)));
3374 if (isAddSub(op8
) && !carrying
) {
3375 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3376 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3381 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3382 setFlags_DEP1(op8
, dst1
, ty
);
3385 if (op8
== Iop_Add8
&& carrying
) {
3386 helper_ADC( size
, dst1
, dst0
, src
,
3387 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3390 if (op8
== Iop_Sub8
&& carrying
) {
3391 helper_SBB( size
, dst1
, dst0
, src
,
3392 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3395 vpanic("dis_op_imm_A(amd64,guest)");
3398 putIRegRAX(size
, mkexpr(dst1
));
3400 DIP("%s%c $%lld, %s\n", t_amd64opc
, nameISize(size
),
3401 lit
, nameIRegRAX(size
));
3406 /* Sign- and Zero-extending moves. */
3408 ULong
dis_movx_E_G ( const VexAbiInfo
* vbi
,
3410 Long delta
, Int szs
, Int szd
, Bool sign_extend
)
3412 UChar rm
= getUChar(delta
);
3413 if (epartIsReg(rm
)) {
3414 putIRegG(szd
, pfx
, rm
,
3416 szs
,szd
,sign_extend
,
3417 getIRegE(szs
,pfx
,rm
)));
3418 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3421 nameIRegE(szs
,pfx
,rm
),
3422 nameIRegG(szd
,pfx
,rm
));
3426 /* E refers to memory */
3430 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
3431 putIRegG(szd
, pfx
, rm
,
3433 szs
,szd
,sign_extend
,
3434 loadLE(szToITy(szs
),mkexpr(addr
))));
3435 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3439 nameIRegG(szd
,pfx
,rm
));
3445 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3446 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3448 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
3450 /* special-case the 64-bit case */
3452 IROp op
= signed_divide
? Iop_DivModS128to64
3453 : Iop_DivModU128to64
;
3454 IRTemp src128
= newTemp(Ity_I128
);
3455 IRTemp dst128
= newTemp(Ity_I128
);
3456 assign( src128
, binop(Iop_64HLto128
,
3458 getIReg64(R_RAX
)) );
3459 assign( dst128
, binop(op
, mkexpr(src128
), mkexpr(t
)) );
3460 putIReg64( R_RAX
, unop(Iop_128to64
,mkexpr(dst128
)) );
3461 putIReg64( R_RDX
, unop(Iop_128HIto64
,mkexpr(dst128
)) );
3463 IROp op
= signed_divide
? Iop_DivModS64to32
3464 : Iop_DivModU64to32
;
3465 IRTemp src64
= newTemp(Ity_I64
);
3466 IRTemp dst64
= newTemp(Ity_I64
);
3470 binop(Iop_32HLto64
, getIRegRDX(4), getIRegRAX(4)) );
3472 binop(op
, mkexpr(src64
), mkexpr(t
)) );
3473 putIRegRAX( 4, unop(Iop_64to32
,mkexpr(dst64
)) );
3474 putIRegRDX( 4, unop(Iop_64HIto32
,mkexpr(dst64
)) );
3477 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3478 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3479 assign( src64
, unop(widen3264
,
3483 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
3484 putIRegRAX( 2, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
3485 putIRegRDX( 2, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
3489 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3490 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3491 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
3492 assign( src64
, unop(widen3264
,
3493 unop(widen1632
, getIRegRAX(2))) );
3495 binop(op
, mkexpr(src64
),
3496 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
3497 putIRegRAX( 1, unop(Iop_16to8
,
3499 unop(Iop_64to32
,mkexpr(dst64
)))) );
3500 putIRegAH( unop(Iop_16to8
,
3502 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
3506 vpanic("codegen_div(amd64)");
3512 ULong
dis_Grp1 ( const VexAbiInfo
* vbi
,
3514 Long delta
, UChar modrm
,
3515 Int am_sz
, Int d_sz
, Int sz
, Long d64
)
3519 IRType ty
= szToITy(sz
);
3520 IRTemp dst1
= newTemp(ty
);
3521 IRTemp src
= newTemp(ty
);
3522 IRTemp dst0
= newTemp(ty
);
3523 IRTemp addr
= IRTemp_INVALID
;
3524 IROp op8
= Iop_INVALID
;
3525 ULong mask
= mkSizeMask(sz
);
3527 switch (gregLO3ofRM(modrm
)) {
3528 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
3529 case 2: break; // ADC
3530 case 3: break; // SBB
3531 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
3532 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
3534 default: vpanic("dis_Grp1(amd64): unhandled case");
3537 if (epartIsReg(modrm
)) {
3538 vassert(am_sz
== 1);
3540 assign(dst0
, getIRegE(sz
,pfx
,modrm
));
3541 assign(src
, mkU(ty
,d64
& mask
));
3543 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3544 helper_ADC( sz
, dst1
, dst0
, src
,
3545 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3547 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3548 helper_SBB( sz
, dst1
, dst0
, src
,
3549 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3551 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3553 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3555 setFlags_DEP1(op8
, dst1
, ty
);
3558 if (gregLO3ofRM(modrm
) < 7)
3559 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3561 delta
+= (am_sz
+ d_sz
);
3562 DIP("%s%c $%lld, %s\n",
3563 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
), d64
,
3564 nameIRegE(sz
,pfx
,modrm
));
3566 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3568 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3569 assign(src
, mkU(ty
,d64
& mask
));
3571 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3572 if (haveLOCK(pfx
)) {
3573 /* cas-style store */
3574 helper_ADC( sz
, dst1
, dst0
, src
,
3575 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3578 helper_ADC( sz
, dst1
, dst0
, src
,
3579 /*store*/addr
, IRTemp_INVALID
, 0 );
3582 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3583 if (haveLOCK(pfx
)) {
3584 /* cas-style store */
3585 helper_SBB( sz
, dst1
, dst0
, src
,
3586 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3589 helper_SBB( sz
, dst1
, dst0
, src
,
3590 /*store*/addr
, IRTemp_INVALID
, 0 );
3593 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3594 if (gregLO3ofRM(modrm
) < 7) {
3595 if (haveLOCK(pfx
)) {
3596 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
3597 mkexpr(dst1
)/*newVal*/,
3598 guest_RIP_curr_instr
);
3600 storeLE(mkexpr(addr
), mkexpr(dst1
));
3604 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3606 setFlags_DEP1(op8
, dst1
, ty
);
3609 delta
+= (len
+d_sz
);
3610 DIP("%s%c $%lld, %s\n",
3611 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
),
3618 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3622 ULong
dis_Grp2 ( const VexAbiInfo
* vbi
,
3624 Long delta
, UChar modrm
,
3625 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
3626 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
3628 /* delta on entry points at the modrm byte. */
3631 Bool isShift
, isRotate
, isRotateC
;
3632 IRType ty
= szToITy(sz
);
3633 IRTemp dst0
= newTemp(ty
);
3634 IRTemp dst1
= newTemp(ty
);
3635 IRTemp addr
= IRTemp_INVALID
;
3639 vassert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
3641 /* Put value to shift/rotate in dst0. */
3642 if (epartIsReg(modrm
)) {
3643 assign(dst0
, getIRegE(sz
, pfx
, modrm
));
3644 delta
+= (am_sz
+ d_sz
);
3646 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3647 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3648 delta
+= len
+ d_sz
;
3652 switch (gregLO3ofRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
3655 switch (gregLO3ofRM(modrm
)) { case 0: case 1: isRotate
= True
; }
3658 switch (gregLO3ofRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
3660 if (!isShift
&& !isRotate
&& !isRotateC
) {
3662 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3666 /* Call a helper; this insn is so ridiculous it does not deserve
3667 better. One problem is, the helper has to calculate both the
3668 new value and the new flags. This is more than 64 bits, and
3669 there is no way to return more than 64 bits from the helper.
3670 Hence the crude and obvious solution is to call it twice,
3671 using the sign of the sz field to indicate whether it is the
3672 value or rflags result we want.
3674 Bool left
= toBool(gregLO3ofRM(modrm
) == 2);
3676 IRExpr
** argsRFLAGS
;
3678 IRTemp new_value
= newTemp(Ity_I64
);
3679 IRTemp new_rflags
= newTemp(Ity_I64
);
3680 IRTemp old_rflags
= newTemp(Ity_I64
);
3682 assign( old_rflags
, widenUto64(mk_amd64g_calculate_rflags_all()) );
3685 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3686 widenUto64(shift_expr
), /* rotate amount */
3693 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3694 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3700 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3701 widenUto64(shift_expr
), /* rotate amount */
3708 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3709 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3714 assign( dst1
, narrowTo(ty
, mkexpr(new_value
)) );
3715 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
3716 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
3717 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
3718 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
3724 IRTemp pre64
= newTemp(Ity_I64
);
3725 IRTemp res64
= newTemp(Ity_I64
);
3726 IRTemp res64ss
= newTemp(Ity_I64
);
3727 IRTemp shift_amt
= newTemp(Ity_I8
);
3728 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3731 switch (gregLO3ofRM(modrm
)) {
3732 case 4: op64
= Iop_Shl64
; break;
3733 case 5: op64
= Iop_Shr64
; break;
3734 case 6: op64
= Iop_Shl64
; break;
3735 case 7: op64
= Iop_Sar64
; break;
3737 default: vpanic("dis_Grp2:shift"); break;
3740 /* Widen the value to be shifted to 64 bits, do the shift, and
3741 narrow back down. This seems surprisingly long-winded, but
3742 unfortunately the AMD semantics requires that 8/16/32-bit
3743 shifts give defined results for shift values all the way up
3744 to 32, and this seems the simplest way to do it. It has the
3745 advantage that the only IR level shifts generated are of 64
3746 bit values, and the shift amount is guaranteed to be in the
3747 range 0 .. 63, thereby observing the IR semantics requiring
3748 all shift values to be in the range 0 .. 2^word_size-1.
3750 Therefore the shift amount is masked with 63 for 64-bit shifts
3751 and 31 for all others.
3753 /* shift_amt = shift_expr & MASK, regardless of operation size */
3754 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(mask
)) );
3756 /* suitably widen the value to be shifted to 64 bits. */
3757 assign( pre64
, op64
==Iop_Sar64
? widenSto64(mkexpr(dst0
))
3758 : widenUto64(mkexpr(dst0
)) );
3760 /* res64 = pre64 `shift` shift_amt */
3761 assign( res64
, binop(op64
, mkexpr(pre64
), mkexpr(shift_amt
)) );
3763 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3769 mkexpr(shift_amt
), mkU8(1)),
3772 /* Build the flags thunk. */
3773 setFlags_DEP1_DEP2_shift(op64
, res64
, res64ss
, ty
, shift_amt
);
3775 /* Narrow the result back down. */
3776 assign( dst1
, narrowTo(ty
, mkexpr(res64
)) );
3778 } /* if (isShift) */
3782 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1
3783 : (ty
==Ity_I32
? 2 : 3));
3784 Bool left
= toBool(gregLO3ofRM(modrm
) == 0);
3785 IRTemp rot_amt
= newTemp(Ity_I8
);
3786 IRTemp rot_amt64
= newTemp(Ity_I8
);
3787 IRTemp oldFlags
= newTemp(Ity_I64
);
3788 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3790 /* rot_amt = shift_expr & mask */
3791 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3792 expressions never shift beyond the word size and thus remain
3794 assign(rot_amt64
, binop(Iop_And8
, shift_expr
, mkU8(mask
)));
3797 assign(rot_amt
, mkexpr(rot_amt64
));
3799 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt64
), mkU8(8*sz
-1)));
3803 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3805 binop( mkSizedOp(ty
,Iop_Or8
),
3806 binop( mkSizedOp(ty
,Iop_Shl8
),
3810 binop( mkSizedOp(ty
,Iop_Shr8
),
3812 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3816 ccOp
+= AMD64G_CC_OP_ROLB
;
3818 } else { /* right */
3820 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3822 binop( mkSizedOp(ty
,Iop_Or8
),
3823 binop( mkSizedOp(ty
,Iop_Shr8
),
3827 binop( mkSizedOp(ty
,Iop_Shl8
),
3829 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3833 ccOp
+= AMD64G_CC_OP_RORB
;
3837 /* dst1 now holds the rotated value. Build flag thunk. We
3838 need the resulting value for this, and the previous flags.
3839 Except don't set it if the rotate count is zero. */
3841 assign(oldFlags
, mk_amd64g_calculate_rflags_all());
3843 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3844 IRTemp rot_amt64b
= newTemp(Ity_I1
);
3845 assign(rot_amt64b
, binop(Iop_CmpNE8
, mkexpr(rot_amt64
), mkU8(0)) );
3847 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3848 stmt( IRStmt_Put( OFFB_CC_OP
,
3849 IRExpr_ITE( mkexpr(rot_amt64b
),
3851 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
3852 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3853 IRExpr_ITE( mkexpr(rot_amt64b
),
3854 widenUto64(mkexpr(dst1
)),
3855 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
3856 stmt( IRStmt_Put( OFFB_CC_DEP2
,
3857 IRExpr_ITE( mkexpr(rot_amt64b
),
3859 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
3860 stmt( IRStmt_Put( OFFB_CC_NDEP
,
3861 IRExpr_ITE( mkexpr(rot_amt64b
),
3863 IRExpr_Get(OFFB_CC_NDEP
,Ity_I64
) ) ));
3864 } /* if (isRotate) */
3866 /* Save result, and finish up. */
3867 if (epartIsReg(modrm
)) {
3868 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3869 if (vex_traceflags
& VEX_TRACE_FE
) {
3871 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3873 vex_printf("%s", shift_expr_txt
);
3875 ppIRExpr(shift_expr
);
3876 vex_printf(", %s\n", nameIRegE(sz
,pfx
,modrm
));
3879 storeLE(mkexpr(addr
), mkexpr(dst1
));
3880 if (vex_traceflags
& VEX_TRACE_FE
) {
3882 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3884 vex_printf("%s", shift_expr_txt
);
3886 ppIRExpr(shift_expr
);
3887 vex_printf(", %s\n", dis_buf
);
3894 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3896 ULong
dis_Grp8_Imm ( const VexAbiInfo
* vbi
,
3898 Long delta
, UChar modrm
,
3899 Int am_sz
, Int sz
, ULong src_val
,
3902 /* src_val denotes a d8.
3903 And delta on entry points at the modrm byte. */
3905 IRType ty
= szToITy(sz
);
3906 IRTemp t2
= newTemp(Ity_I64
);
3907 IRTemp t2m
= newTemp(Ity_I64
);
3908 IRTemp t_addr
= IRTemp_INVALID
;
3912 /* we're optimists :-) */
3915 /* Check whether F2 or F3 are acceptable. */
3916 if (epartIsReg(modrm
)) {
3917 /* F2 or F3 are not allowed in the register case. */
3918 if (haveF2orF3(pfx
)) {
3923 /* F2 or F3 (but not both) are allowable provided LOCK is also
3925 if (haveF2orF3(pfx
)) {
3926 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
3933 /* Limit src_val -- the bit offset -- to something within a word.
3934 The Intel docs say that literal offsets larger than a word are
3935 masked in this way. */
3937 case 2: src_val
&= 15; break;
3938 case 4: src_val
&= 31; break;
3939 case 8: src_val
&= 63; break;
3940 default: *decode_OK
= False
; return delta
;
3943 /* Invent a mask suitable for the operation. */
3944 switch (gregLO3ofRM(modrm
)) {
3945 case 4: /* BT */ mask
= 0; break;
3946 case 5: /* BTS */ mask
= 1ULL << src_val
; break;
3947 case 6: /* BTR */ mask
= ~(1ULL << src_val
); break;
3948 case 7: /* BTC */ mask
= 1ULL << src_val
; break;
3949 /* If this needs to be extended, probably simplest to make a
3950 new function to handle the other cases (0 .. 3). The
3951 Intel docs do however not indicate any use for 0 .. 3, so
3952 we don't expect this to happen. */
3953 default: *decode_OK
= False
; return delta
;
3956 /* Fetch the value to be tested and modified into t2, which is
3957 64-bits wide regardless of sz. */
3958 if (epartIsReg(modrm
)) {
3959 vassert(am_sz
== 1);
3960 assign( t2
, widenUto64(getIRegE(sz
, pfx
, modrm
)) );
3961 delta
+= (am_sz
+ 1);
3962 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
3964 src_val
, nameIRegE(sz
,pfx
,modrm
));
3967 t_addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 1 );
3969 assign( t2
, widenUto64(loadLE(ty
, mkexpr(t_addr
))) );
3970 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
3975 /* Compute the new value into t2m, if non-BT. */
3976 switch (gregLO3ofRM(modrm
)) {
3980 assign( t2m
, binop(Iop_Or64
, mkU64(mask
), mkexpr(t2
)) );
3983 assign( t2m
, binop(Iop_And64
, mkU64(mask
), mkexpr(t2
)) );
3986 assign( t2m
, binop(Iop_Xor64
, mkU64(mask
), mkexpr(t2
)) );
3989 /*NOTREACHED*/ /*the previous switch guards this*/
3993 /* Write the result back, if non-BT. */
3994 if (gregLO3ofRM(modrm
) != 4 /* BT */) {
3995 if (epartIsReg(modrm
)) {
3996 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(t2m
)));
3998 if (haveLOCK(pfx
)) {
3999 casLE( mkexpr(t_addr
),
4000 narrowTo(ty
, mkexpr(t2
))/*expd*/,
4001 narrowTo(ty
, mkexpr(t2m
))/*new*/,
4002 guest_RIP_curr_instr
);
4004 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
4009 /* Copy relevant bit from t2 into the carry flag. */
4010 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
4011 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
4012 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
4016 binop(Iop_Shr64
, mkexpr(t2
), mkU8(src_val
)),
4019 /* Set NDEP even though it isn't used. This makes redundant-PUT
4020 elimination of previous stores to this field work better. */
4021 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
4027 /* Signed/unsigned widening multiply. Generate IR to multiply the
4028 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4029 RDX:RAX/EDX:EAX/DX:AX/AX.
4031 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
4032 IRTemp tmp
, const HChar
* tmp_txt
)
4034 IRType ty
= szToITy(sz
);
4035 IRTemp t1
= newTemp(ty
);
4037 assign( t1
, getIRegRAX(sz
) );
4041 IRTemp res128
= newTemp(Ity_I128
);
4042 IRTemp resHi
= newTemp(Ity_I64
);
4043 IRTemp resLo
= newTemp(Ity_I64
);
4044 IROp mulOp
= syned
? Iop_MullS64
: Iop_MullU64
;
4045 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4046 setFlags_MUL ( Ity_I64
, t1
, tmp
, tBaseOp
);
4047 assign( res128
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4048 assign( resHi
, unop(Iop_128HIto64
,mkexpr(res128
)));
4049 assign( resLo
, unop(Iop_128to64
,mkexpr(res128
)));
4050 putIReg64(R_RDX
, mkexpr(resHi
));
4051 putIReg64(R_RAX
, mkexpr(resLo
));
4055 IRTemp res64
= newTemp(Ity_I64
);
4056 IRTemp resHi
= newTemp(Ity_I32
);
4057 IRTemp resLo
= newTemp(Ity_I32
);
4058 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
4059 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4060 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
4061 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4062 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
4063 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
4064 putIRegRDX(4, mkexpr(resHi
));
4065 putIRegRAX(4, mkexpr(resLo
));
4069 IRTemp res32
= newTemp(Ity_I32
);
4070 IRTemp resHi
= newTemp(Ity_I16
);
4071 IRTemp resLo
= newTemp(Ity_I16
);
4072 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
4073 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4074 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
4075 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4076 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
4077 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
4078 putIRegRDX(2, mkexpr(resHi
));
4079 putIRegRAX(2, mkexpr(resLo
));
4083 IRTemp res16
= newTemp(Ity_I16
);
4084 IRTemp resHi
= newTemp(Ity_I8
);
4085 IRTemp resLo
= newTemp(Ity_I8
);
4086 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
4087 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4088 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
4089 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4090 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
4091 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
4092 putIRegRAX(2, mkexpr(res16
));
4097 vpanic("codegen_mulL_A_D(amd64)");
4099 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
4103 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4106 ULong
dis_Grp3 ( const VexAbiInfo
* vbi
,
4107 Prefix pfx
, Int sz
, Long delta
, Bool
* decode_OK
)
4114 IRType ty
= szToITy(sz
);
4115 IRTemp t1
= newTemp(ty
);
4116 IRTemp dst1
, src
, dst0
;
4118 modrm
= getUChar(delta
);
4119 if (epartIsReg(modrm
)) {
4120 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4121 if (haveF2orF3(pfx
)) goto unhandled
;
4122 switch (gregLO3ofRM(modrm
)) {
4123 case 0: { /* TEST */
4125 d64
= getSDisp(imin(4,sz
), delta
);
4126 delta
+= imin(4,sz
);
4128 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4129 getIRegE(sz
,pfx
,modrm
),
4130 mkU(ty
, d64
& mkSizeMask(sz
))));
4131 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4132 DIP("test%c $%lld, %s\n",
4134 nameIRegE(sz
, pfx
, modrm
));
4142 putIRegE(sz
, pfx
, modrm
,
4143 unop(mkSizedOp(ty
,Iop_Not8
),
4144 getIRegE(sz
, pfx
, modrm
)));
4145 DIP("not%c %s\n", nameISize(sz
),
4146 nameIRegE(sz
, pfx
, modrm
));
4153 assign(dst0
, mkU(ty
,0));
4154 assign(src
, getIRegE(sz
, pfx
, modrm
));
4155 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4157 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4158 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
4159 DIP("neg%c %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
));
4161 case 4: /* MUL (unsigned widening) */
4164 assign(src
, getIRegE(sz
,pfx
,modrm
));
4165 codegen_mulL_A_D ( sz
, False
, src
,
4166 nameIRegE(sz
,pfx
,modrm
) );
4168 case 5: /* IMUL (signed widening) */
4171 assign(src
, getIRegE(sz
,pfx
,modrm
));
4172 codegen_mulL_A_D ( sz
, True
, src
,
4173 nameIRegE(sz
,pfx
,modrm
) );
4177 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4178 codegen_div ( sz
, t1
, False
);
4179 DIP("div%c %s\n", nameISize(sz
),
4180 nameIRegE(sz
, pfx
, modrm
));
4184 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4185 codegen_div ( sz
, t1
, True
);
4186 DIP("idiv%c %s\n", nameISize(sz
),
4187 nameIRegE(sz
, pfx
, modrm
));
4191 vpanic("Grp3(amd64,R)");
4194 /* Decide if F2/XACQ or F3/XREL might be valid. */
4195 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4196 if ((gregLO3ofRM(modrm
) == 3/*NEG*/ || gregLO3ofRM(modrm
) == 2/*NOT*/)
4197 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4200 if (!validF2orF3
) goto unhandled
;
4202 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
4203 /* we have to inform disAMode of any immediate
4205 gregLO3ofRM(modrm
)==0/*TEST*/
4211 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4212 switch (gregLO3ofRM(modrm
)) {
4213 case 0: { /* TEST */
4214 d64
= getSDisp(imin(4,sz
), delta
);
4215 delta
+= imin(4,sz
);
4217 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4219 mkU(ty
, d64
& mkSizeMask(sz
))));
4220 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4221 DIP("test%c $%lld, %s\n", nameISize(sz
), d64
, dis_buf
);
4229 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
4230 if (haveLOCK(pfx
)) {
4231 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4232 guest_RIP_curr_instr
);
4234 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4236 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
4242 assign(dst0
, mkU(ty
,0));
4243 assign(src
, mkexpr(t1
));
4244 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4246 if (haveLOCK(pfx
)) {
4247 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4248 guest_RIP_curr_instr
);
4250 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4252 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4253 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
4255 case 4: /* MUL (unsigned widening) */
4256 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
4259 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
4262 codegen_div ( sz
, t1
, False
);
4263 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
4266 codegen_div ( sz
, t1
, True
);
4267 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
4271 vpanic("Grp3(amd64,M)");
4281 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4284 ULong
dis_Grp4 ( const VexAbiInfo
* vbi
,
4285 Prefix pfx
, Long delta
, Bool
* decode_OK
)
4291 IRTemp t1
= newTemp(ty
);
4292 IRTemp t2
= newTemp(ty
);
4296 modrm
= getUChar(delta
);
4297 if (epartIsReg(modrm
)) {
4298 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4299 if (haveF2orF3(pfx
)) goto unhandled
;
4300 assign(t1
, getIRegE(1, pfx
, modrm
));
4301 switch (gregLO3ofRM(modrm
)) {
4303 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4304 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4305 setFlags_INC_DEC( True
, t2
, ty
);
4308 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4309 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4310 setFlags_INC_DEC( False
, t2
, ty
);
4317 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)),
4318 nameIRegE(1, pfx
, modrm
));
4320 /* Decide if F2/XACQ or F3/XREL might be valid. */
4321 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4322 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4323 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4326 if (!validF2orF3
) goto unhandled
;
4328 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
4329 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
4330 switch (gregLO3ofRM(modrm
)) {
4332 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4333 if (haveLOCK(pfx
)) {
4334 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4335 guest_RIP_curr_instr
);
4337 storeLE( mkexpr(addr
), mkexpr(t2
) );
4339 setFlags_INC_DEC( True
, t2
, ty
);
4342 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4343 if (haveLOCK(pfx
)) {
4344 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4345 guest_RIP_curr_instr
);
4347 storeLE( mkexpr(addr
), mkexpr(t2
) );
4349 setFlags_INC_DEC( False
, t2
, ty
);
4356 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)), dis_buf
);
4365 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4368 ULong
dis_Grp5 ( const VexAbiInfo
* vbi
,
4369 Prefix pfx
, Int sz
, Long delta
,
4370 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
4375 IRTemp addr
= IRTemp_INVALID
;
4376 IRType ty
= szToITy(sz
);
4377 IRTemp t1
= newTemp(ty
);
4378 IRTemp t2
= IRTemp_INVALID
;
4379 IRTemp t3
= IRTemp_INVALID
;
4384 modrm
= getUChar(delta
);
4385 if (epartIsReg(modrm
)) {
4386 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4387 F2/CALL and F2/JMP may have bnd prefix. */
4390 && (gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)))
4392 assign(t1
, getIRegE(sz
,pfx
,modrm
));
4393 switch (gregLO3ofRM(modrm
)) {
4396 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4397 mkexpr(t1
), mkU(ty
,1)));
4398 setFlags_INC_DEC( True
, t2
, ty
);
4399 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4403 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4404 mkexpr(t1
), mkU(ty
,1)));
4405 setFlags_INC_DEC( False
, t2
, ty
);
4406 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4408 case 2: /* call Ev */
4409 /* Ignore any sz value and operate as if sz==8. */
4410 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4411 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4413 t3
= newTemp(Ity_I64
);
4414 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4415 t2
= newTemp(Ity_I64
);
4416 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4417 putIReg64(R_RSP
, mkexpr(t2
));
4418 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+1));
4419 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(reg)");
4420 jmp_treg(dres
, Ijk_Call
, t3
);
4421 vassert(dres
->whatNext
== Dis_StopHere
);
4424 case 4: /* jmp Ev */
4425 /* Ignore any sz value and operate as if sz==8. */
4426 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4427 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4429 t3
= newTemp(Ity_I64
);
4430 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4431 jmp_treg(dres
, Ijk_Boring
, t3
);
4432 vassert(dres
->whatNext
== Dis_StopHere
);
4435 case 6: /* PUSH Ev */
4436 /* There is no encoding for 32-bit operand size; hence ... */
4437 if (sz
== 4) sz
= 8;
4438 if (sz
== 8 || sz
== 2) {
4439 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4441 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4442 t2
= newTemp(Ity_I64
);
4443 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4444 putIReg64(R_RSP
, mkexpr(t2
) );
4445 storeLE( mkexpr(t2
), mkexpr(t3
) );
4448 goto unhandledR
; /* awaiting test case */
4456 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4457 showSz
? nameISize(sz
) : ' ',
4458 nameIRegE(sz
, pfx
, modrm
));
4460 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4461 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4462 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4463 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4465 } else if ((gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)
4466 && (haveF2(pfx
) && !haveF3(pfx
))) {
4469 if (!validF2orF3
) goto unhandledM
;
4471 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
4472 if (gregLO3ofRM(modrm
) != 2 && gregLO3ofRM(modrm
) != 4
4473 && gregLO3ofRM(modrm
) != 6) {
4474 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4476 switch (gregLO3ofRM(modrm
)) {
4479 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4480 mkexpr(t1
), mkU(ty
,1)));
4481 if (haveLOCK(pfx
)) {
4482 casLE( mkexpr(addr
),
4483 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4485 storeLE(mkexpr(addr
),mkexpr(t2
));
4487 setFlags_INC_DEC( True
, t2
, ty
);
4491 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4492 mkexpr(t1
), mkU(ty
,1)));
4493 if (haveLOCK(pfx
)) {
4494 casLE( mkexpr(addr
),
4495 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4497 storeLE(mkexpr(addr
),mkexpr(t2
));
4499 setFlags_INC_DEC( False
, t2
, ty
);
4501 case 2: /* call Ev */
4502 /* Ignore any sz value and operate as if sz==8. */
4503 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4504 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4506 t3
= newTemp(Ity_I64
);
4507 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4508 t2
= newTemp(Ity_I64
);
4509 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4510 putIReg64(R_RSP
, mkexpr(t2
));
4511 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+len
));
4512 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(mem)");
4513 jmp_treg(dres
, Ijk_Call
, t3
);
4514 vassert(dres
->whatNext
== Dis_StopHere
);
4517 case 4: /* JMP Ev */
4518 /* Ignore any sz value and operate as if sz==8. */
4519 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4520 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4522 t3
= newTemp(Ity_I64
);
4523 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4524 jmp_treg(dres
, Ijk_Boring
, t3
);
4525 vassert(dres
->whatNext
== Dis_StopHere
);
4528 case 6: /* PUSH Ev */
4529 /* There is no encoding for 32-bit operand size; hence ... */
4530 if (sz
== 4) sz
= 8;
4531 if (sz
== 8 || sz
== 2) {
4532 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4534 assign(t3
, loadLE(ty
,mkexpr(addr
)));
4535 t2
= newTemp(Ity_I64
);
4536 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4537 putIReg64(R_RSP
, mkexpr(t2
) );
4538 storeLE( mkexpr(t2
), mkexpr(t3
) );
4541 goto unhandledM
; /* awaiting test case */
4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4550 showSz
? nameISize(sz
) : ' ',
4557 /*------------------------------------------------------------*/
4558 /*--- Disassembling string ops (including REP prefixes) ---*/
4559 /*------------------------------------------------------------*/
4561 /* Code shared by all the string ops */
4563 void dis_string_op_increment ( Int sz
, IRTemp t_inc
)
4566 if (sz
== 8 || sz
== 4 || sz
== 2) {
4568 if (sz
== 4) logSz
= 2;
4569 if (sz
== 8) logSz
= 3;
4571 binop(Iop_Shl64
, IRExpr_Get( OFFB_DFLAG
, Ity_I64
),
4575 IRExpr_Get( OFFB_DFLAG
, Ity_I64
) );
4580 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
, Prefix pfx
),
4581 Int sz
, const HChar
* name
, Prefix pfx
)
4583 IRTemp t_inc
= newTemp(Ity_I64
);
4584 /* Really we ought to inspect the override prefixes, but we don't.
4585 The following assertion catches any resulting sillyness. */
4586 vassert(pfx
== clearSegBits(pfx
));
4587 dis_string_op_increment(sz
, t_inc
);
4588 dis_OP( sz
, t_inc
, pfx
);
4589 DIP("%s%c\n", name
, nameISize(sz
));
4593 void dis_MOVS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4595 IRType ty
= szToITy(sz
);
4596 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4597 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4598 IRExpr
*incd
, *incs
;
4601 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4602 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4604 assign( td
, getIReg64(R_RDI
) );
4605 assign( ts
, getIReg64(R_RSI
) );
4608 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
4610 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4611 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4613 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4614 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4616 putIReg64( R_RDI
, incd
);
4617 putIReg64( R_RSI
, incs
);
4621 void dis_LODS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4623 IRType ty
= szToITy(sz
);
4624 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4628 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4630 assign( ts
, getIReg64(R_RSI
) );
4632 putIRegRAX ( sz
, loadLE(ty
, mkexpr(ts
)) );
4634 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4636 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4637 putIReg64( R_RSI
, incs
);
4641 void dis_STOS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4643 IRType ty
= szToITy(sz
);
4644 IRTemp ta
= newTemp(ty
); /* rAX */
4645 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4648 assign( ta
, getIRegRAX(sz
) );
4651 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4653 assign( td
, getIReg64(R_RDI
) );
4655 storeLE( mkexpr(td
), mkexpr(ta
) );
4657 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4659 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4660 putIReg64( R_RDI
, incd
);
4664 void dis_CMPS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4666 IRType ty
= szToITy(sz
);
4667 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4668 IRTemp tsv
= newTemp(ty
); /* (RSI) */
4669 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4670 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4671 IRExpr
*incd
, *incs
;
4674 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4675 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4677 assign( td
, getIReg64(R_RDI
) );
4678 assign( ts
, getIReg64(R_RSI
) );
4681 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4683 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
4685 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
4687 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4688 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4690 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4691 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4693 putIReg64( R_RDI
, incd
);
4694 putIReg64( R_RSI
, incs
);
4698 void dis_SCAS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4700 IRType ty
= szToITy(sz
);
4701 IRTemp ta
= newTemp(ty
); /* rAX */
4702 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4703 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4706 assign( ta
, getIRegRAX(sz
) );
4709 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4711 assign( td
, getIReg64(R_RDI
) );
4713 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4715 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
4717 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4719 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4720 putIReg64( R_RDI
, incd
);
4724 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4725 the insn is the last one in the basic block, and so emit a jump to
4726 the next insn, rather than just falling through. */
4728 void dis_REP_op ( /*MOD*/DisResult
* dres
,
4730 void (*dis_OP
)(Int
, IRTemp
, Prefix
),
4731 Int sz
, Addr64 rip
, Addr64 rip_next
, const HChar
* name
,
4734 IRTemp t_inc
= newTemp(Ity_I64
);
4738 /* Really we ought to inspect the override prefixes, but we don't.
4739 The following assertion catches any resulting sillyness. */
4740 vassert(pfx
== clearSegBits(pfx
));
4743 tc
= newTemp(Ity_I32
); /* ECX */
4744 assign( tc
, getIReg32(R_RCX
) );
4745 cmp
= binop(Iop_CmpEQ32
, mkexpr(tc
), mkU32(0));
4747 tc
= newTemp(Ity_I64
); /* RCX */
4748 assign( tc
, getIReg64(R_RCX
) );
4749 cmp
= binop(Iop_CmpEQ64
, mkexpr(tc
), mkU64(0));
4752 stmt( IRStmt_Exit( cmp
, Ijk_Boring
,
4753 IRConst_U64(rip_next
), OFFB_RIP
) );
4756 putIReg32(R_RCX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
4758 putIReg64(R_RCX
, binop(Iop_Sub64
, mkexpr(tc
), mkU64(1)) );
4760 dis_string_op_increment(sz
, t_inc
);
4761 dis_OP (sz
, t_inc
, pfx
);
4763 if (cond
== AMD64CondAlways
) {
4764 jmp_lit(dres
, Ijk_Boring
, rip
);
4765 vassert(dres
->whatNext
== Dis_StopHere
);
4767 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond
),
4771 jmp_lit(dres
, Ijk_Boring
, rip_next
);
4772 vassert(dres
->whatNext
== Dis_StopHere
);
4774 DIP("%s%c\n", name
, nameISize(sz
));
4778 /*------------------------------------------------------------*/
4779 /*--- Arithmetic, etc. ---*/
4780 /*------------------------------------------------------------*/
4782 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4784 ULong
dis_mul_E_G ( const VexAbiInfo
* vbi
,
4791 UChar rm
= getUChar(delta0
);
4792 IRType ty
= szToITy(size
);
4793 IRTemp te
= newTemp(ty
);
4794 IRTemp tg
= newTemp(ty
);
4795 IRTemp resLo
= newTemp(ty
);
4797 assign( tg
, getIRegG(size
, pfx
, rm
) );
4798 if (epartIsReg(rm
)) {
4799 assign( te
, getIRegE(size
, pfx
, rm
) );
4801 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta0
, dis_buf
, 0 );
4802 assign( te
, loadLE(ty
,mkexpr(addr
)) );
4805 setFlags_MUL ( ty
, te
, tg
, AMD64G_CC_OP_SMULB
);
4807 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
4809 putIRegG(size
, pfx
, rm
, mkexpr(resLo
) );
4811 if (epartIsReg(rm
)) {
4812 DIP("imul%c %s, %s\n", nameISize(size
),
4813 nameIRegE(size
,pfx
,rm
),
4814 nameIRegG(size
,pfx
,rm
));
4817 DIP("imul%c %s, %s\n", nameISize(size
),
4819 nameIRegG(size
,pfx
,rm
));
4825 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4827 ULong
dis_imul_I_E_G ( const VexAbiInfo
* vbi
,
4836 UChar rm
= getUChar(delta
);
4837 IRType ty
= szToITy(size
);
4838 IRTemp te
= newTemp(ty
);
4839 IRTemp tl
= newTemp(ty
);
4840 IRTemp resLo
= newTemp(ty
);
4842 vassert(/*size == 1 ||*/ size
== 2 || size
== 4 || size
== 8);
4844 if (epartIsReg(rm
)) {
4845 assign(te
, getIRegE(size
, pfx
, rm
));
4848 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
4850 assign(te
, loadLE(ty
, mkexpr(addr
)));
4853 d64
= getSDisp(imin(4,litsize
),delta
);
4854 delta
+= imin(4,litsize
);
4856 d64
&= mkSizeMask(size
);
4857 assign(tl
, mkU(ty
,d64
));
4859 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
4861 setFlags_MUL ( ty
, te
, tl
, AMD64G_CC_OP_SMULB
);
4863 putIRegG(size
, pfx
, rm
, mkexpr(resLo
));
4865 DIP("imul%c $%lld, %s, %s\n",
4866 nameISize(size
), d64
,
4867 ( epartIsReg(rm
) ? nameIRegE(size
,pfx
,rm
) : dis_buf
),
4868 nameIRegG(size
,pfx
,rm
) );
4873 /* Generate an IR sequence to do a popcount operation on the supplied
4874 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4875 Ity_I16, Ity_I32 or Ity_I64 only. */
4876 static IRTemp
gen_POPCOUNT ( IRType ty
, IRTemp src
)
4879 if (ty
== Ity_I16
) {
4880 IRTemp old
= IRTemp_INVALID
;
4881 IRTemp nyu
= IRTemp_INVALID
;
4882 IRTemp mask
[4], shift
[4];
4883 for (i
= 0; i
< 4; i
++) {
4884 mask
[i
] = newTemp(ty
);
4887 assign(mask
[0], mkU16(0x5555));
4888 assign(mask
[1], mkU16(0x3333));
4889 assign(mask
[2], mkU16(0x0F0F));
4890 assign(mask
[3], mkU16(0x00FF));
4892 for (i
= 0; i
< 4; i
++) {
4900 binop(Iop_Shr16
, mkexpr(old
), mkU8(shift
[i
])),
4906 if (ty
== Ity_I32
) {
4907 IRTemp old
= IRTemp_INVALID
;
4908 IRTemp nyu
= IRTemp_INVALID
;
4909 IRTemp mask
[5], shift
[5];
4910 for (i
= 0; i
< 5; i
++) {
4911 mask
[i
] = newTemp(ty
);
4914 assign(mask
[0], mkU32(0x55555555));
4915 assign(mask
[1], mkU32(0x33333333));
4916 assign(mask
[2], mkU32(0x0F0F0F0F));
4917 assign(mask
[3], mkU32(0x00FF00FF));
4918 assign(mask
[4], mkU32(0x0000FFFF));
4920 for (i
= 0; i
< 5; i
++) {
4928 binop(Iop_Shr32
, mkexpr(old
), mkU8(shift
[i
])),
4934 if (ty
== Ity_I64
) {
4935 IRTemp old
= IRTemp_INVALID
;
4936 IRTemp nyu
= IRTemp_INVALID
;
4937 IRTemp mask
[6], shift
[6];
4938 for (i
= 0; i
< 6; i
++) {
4939 mask
[i
] = newTemp(ty
);
4942 assign(mask
[0], mkU64(0x5555555555555555ULL
));
4943 assign(mask
[1], mkU64(0x3333333333333333ULL
));
4944 assign(mask
[2], mkU64(0x0F0F0F0F0F0F0F0FULL
));
4945 assign(mask
[3], mkU64(0x00FF00FF00FF00FFULL
));
4946 assign(mask
[4], mkU64(0x0000FFFF0000FFFFULL
));
4947 assign(mask
[5], mkU64(0x00000000FFFFFFFFULL
));
4949 for (i
= 0; i
< 6; i
++) {
4957 binop(Iop_Shr64
, mkexpr(old
), mkU8(shift
[i
])),
4968 /* Generate an IR sequence to do a count-leading-zeroes operation on
4969 the supplied IRTemp, and return a new IRTemp holding the result.
4970 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4971 the argument is zero, return the number of bits in the word (the
4972 natural semantics). */
4973 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
4975 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
4977 IRTemp src64
= newTemp(Ity_I64
);
4978 assign(src64
, widenUto64( mkexpr(src
) ));
4980 IRTemp src64x
= newTemp(Ity_I64
);
4982 binop(Iop_Shl64
, mkexpr(src64
),
4983 mkU8(64 - 8 * sizeofIRType(ty
))));
4985 // Clz64 has undefined semantics when its input is zero, so
4986 // special-case around that.
4987 IRTemp res64
= newTemp(Ity_I64
);
4990 binop(Iop_CmpEQ64
, mkexpr(src64x
), mkU64(0)),
4991 mkU64(8 * sizeofIRType(ty
)),
4992 unop(Iop_Clz64
, mkexpr(src64x
))
4995 IRTemp res
= newTemp(ty
);
4996 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5001 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5002 the supplied IRTemp, and return a new IRTemp holding the result.
5003 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5004 the argument is zero, return the number of bits in the word (the
5005 natural semantics). */
5006 static IRTemp
gen_TZCNT ( IRType ty
, IRTemp src
)
5008 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
5010 IRTemp src64
= newTemp(Ity_I64
);
5011 assign(src64
, widenUto64( mkexpr(src
) ));
5013 // Ctz64 has undefined semantics when its input is zero, so
5014 // special-case around that.
5015 IRTemp res64
= newTemp(Ity_I64
);
5018 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0)),
5019 mkU64(8 * sizeofIRType(ty
)),
5020 unop(Iop_Ctz64
, mkexpr(src64
))
5023 IRTemp res
= newTemp(ty
);
5024 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5029 /*------------------------------------------------------------*/
5031 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5033 /*------------------------------------------------------------*/
5035 /* --- Helper functions for dealing with the register stack. --- */
5037 /* --- Set the emulation-warning pseudo-register. --- */
5039 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
5041 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5042 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
5045 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5047 static IRExpr
* mkQNaN64 ( void )
5049 /* QNaN is 0 2047 1 0(51times)
5050 == 0b 11111111111b 1 0(51times)
5051 == 0x7FF8 0000 0000 0000
5053 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
5056 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5058 static IRExpr
* get_ftop ( void )
5060 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
5063 static void put_ftop ( IRExpr
* e
)
5065 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5066 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
5069 /* --------- Get/put the C3210 bits. --------- */
5071 static IRExpr
* /* :: Ity_I64 */ get_C3210 ( void )
5073 return IRExpr_Get( OFFB_FC3210
, Ity_I64
);
5076 static void put_C3210 ( IRExpr
* e
/* :: Ity_I64 */ )
5078 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
5079 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
5082 /* --------- Get/put the FPU rounding mode. --------- */
5083 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
5085 return unop(Iop_64to32
, IRExpr_Get( OFFB_FPROUND
, Ity_I64
));
5088 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
5090 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5091 stmt( IRStmt_Put( OFFB_FPROUND
, unop(Iop_32Uto64
,e
) ) );
5095 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5096 /* Produces a value in 0 .. 3, which is encoded as per the type
5097 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5098 per IRRoundingMode, we merely need to get it and mask it for
5101 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
5103 return binop( Iop_And32
, get_fpround(), mkU32(3) );
5106 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5108 return mkU32(Irrm_NEAREST
);
5112 /* --------- Get/set FP register tag bytes. --------- */
5114 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5116 static void put_ST_TAG ( Int i
, IRExpr
* value
)
5119 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
5120 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5121 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5124 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5125 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5127 static IRExpr
* get_ST_TAG ( Int i
)
5129 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5130 return IRExpr_GetI( descr
, get_ftop(), i
);
5134 /* --------- Get/set FP registers. --------- */
5136 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5137 register's tag to indicate the register is full. The previous
5138 state of the register is not checked. */
5140 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
5143 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
5144 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5145 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5146 /* Mark the register as in-use. */
5147 put_ST_TAG(i
, mkU8(1));
5150 /* Given i, and some expression e, emit
5151 ST(i) = is_full(i) ? NaN : e
5152 and set the tag accordingly.
5155 static void put_ST ( Int i
, IRExpr
* value
)
5159 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5160 /* non-0 means full */
5169 /* Given i, generate an expression yielding 'ST(i)'. */
5171 static IRExpr
* get_ST_UNCHECKED ( Int i
)
5173 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5174 return IRExpr_GetI( descr
, get_ftop(), i
);
5178 /* Given i, generate an expression yielding
5179 is_full(i) ? ST(i) : NaN
5182 static IRExpr
* get_ST ( Int i
)
5185 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5186 /* non-0 means full */
5187 get_ST_UNCHECKED(i
),
5193 /* Given i, and some expression e, and a condition cond, generate IR
5194 which has the same effect as put_ST(i,e) when cond is true and has
5195 no effect when cond is false. Given the lack of proper
5196 if-then-else in the IR, this is pretty tricky.
5199 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
5201 // new_tag = if cond then FULL else old_tag
5202 // new_val = if cond then (if old_tag==FULL then NaN else val)
5205 IRTemp old_tag
= newTemp(Ity_I8
);
5206 assign(old_tag
, get_ST_TAG(i
));
5207 IRTemp new_tag
= newTemp(Ity_I8
);
5209 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
5211 IRTemp old_val
= newTemp(Ity_F64
);
5212 assign(old_val
, get_ST_UNCHECKED(i
));
5213 IRTemp new_val
= newTemp(Ity_F64
);
5215 IRExpr_ITE(mkexpr(cond
),
5216 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
5217 /* non-0 means full */
5223 put_ST_UNCHECKED(i
, mkexpr(new_val
));
5224 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5225 // now set it to new_tag instead.
5226 put_ST_TAG(i
, mkexpr(new_tag
));
5229 /* Adjust FTOP downwards by one register. */
5231 static void fp_push ( void )
5233 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
5236 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5239 static void maybe_fp_push ( IRTemp cond
)
5241 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
5244 /* Adjust FTOP upwards by one register, and mark the vacated register
5247 static void fp_pop ( void )
5249 put_ST_TAG(0, mkU8(0));
5250 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
5253 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5256 static void set_C2 ( IRExpr
* e
)
5258 IRExpr
* cleared
= binop(Iop_And64
, get_C3210(), mkU64(~AMD64G_FC_MASK_C2
));
5259 put_C3210( binop(Iop_Or64
,
5261 binop(Iop_Shl64
, e
, mkU8(AMD64G_FC_SHIFT_C2
))) );
5264 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5265 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5266 test is simple, but the derivation of it is not so simple.
5268 The exponent field for an IEEE754 double is 11 bits. That means it
5269 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5270 the number is either a NaN or an Infinity and so is not finite.
5271 Furthermore, a finite value of exactly 2^63 is the smallest value
5272 that has exponent value 0x43E. Hence, what we need to do is
5273 extract the exponent, ignoring the sign bit and mantissa, and check
5274 it is < 0x43E, or <= 0x43D.
5276 To make this easily applicable to 32- and 64-bit targets, a
5277 roundabout approach is used. First the number is converted to I64,
5278 then the top 32 bits are taken. Shifting them right by 20 bits
5279 places the sign bit and exponent in the bottom 12 bits. Anding
5280 with 0x7FF gets rid of the sign bit, leaving just the exponent
5281 available for comparison.
5283 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
5285 IRTemp i64
= newTemp(Ity_I64
);
5286 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
5287 IRTemp exponent
= newTemp(Ity_I32
);
5290 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
5292 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
5293 assign(in_range_and_finite
,
5294 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
5295 return in_range_and_finite
;
5298 /* Invent a plausible-looking FPU status word value:
5299 ((ftop & 7) << 11) | (c3210 & 0x4700)
5301 static IRExpr
* get_FPU_sw ( void )
5307 binop(Iop_And32
, get_ftop(), mkU32(7)),
5309 binop(Iop_And32
, unop(Iop_64to32
, get_C3210()),
5315 /* Generate a dirty helper call that initialises the x87 state a la
5316 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5317 |guard| is used as a guarding condition.
5319 static void gen_FINIT_SEQUENCE ( IRExpr
* guard
)
5321 /* Uses dirty helper:
5322 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5323 IRDirty
* d
= unsafeIRDirty_0_N (
5325 "amd64g_dirtyhelper_FINIT",
5326 &amd64g_dirtyhelper_FINIT
,
5327 mkIRExprVec_1( IRExpr_GSPTR() )
5330 /* declare we're writing guest state */
5332 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5334 d
->fxState
[0].fx
= Ifx_Write
;
5335 d
->fxState
[0].offset
= OFFB_FTOP
;
5336 d
->fxState
[0].size
= sizeof(UInt
);
5338 d
->fxState
[1].fx
= Ifx_Write
;
5339 d
->fxState
[1].offset
= OFFB_FPREGS
;
5340 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5342 d
->fxState
[2].fx
= Ifx_Write
;
5343 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5344 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5346 d
->fxState
[3].fx
= Ifx_Write
;
5347 d
->fxState
[3].offset
= OFFB_FPROUND
;
5348 d
->fxState
[3].size
= sizeof(ULong
);
5350 d
->fxState
[4].fx
= Ifx_Write
;
5351 d
->fxState
[4].offset
= OFFB_FC3210
;
5352 d
->fxState
[4].size
= sizeof(ULong
);
5357 stmt( IRStmt_Dirty(d
) );
5361 /* ------------------------------------------------------- */
5362 /* Given all that stack-mangling junk, we can now go ahead
5363 and describe FP instructions.
5366 /* ST(0) = ST(0) `op` mem64/32(addr)
5367 Need to check ST(0)'s tag on read, but not on write.
5370 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5373 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5377 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5379 loadLE(Ity_F64
,mkexpr(addr
))
5384 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5386 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
5392 /* ST(0) = mem64/32(addr) `op` ST(0)
5393 Need to check ST(0)'s tag on read, but not on write.
5396 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5399 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5403 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5404 loadLE(Ity_F64
,mkexpr(addr
)),
5410 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5411 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
5418 /* ST(dst) = ST(dst) `op` ST(src).
5419 Check dst and src tags when reading but not on write.
5422 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5425 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5429 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5437 /* ST(dst) = ST(src) `op` ST(dst).
5438 Check dst and src tags when reading but not on write.
5441 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5444 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5448 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5456 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5457 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
5459 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
5460 /* This is a bit of a hack (and isn't really right). It sets
5461 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5462 documentation implies A and S are unchanged.
5464 /* It's also fishy in that it is used both for COMIP and
5465 UCOMIP, and they aren't the same (although similar). */
5466 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
5467 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
5472 binop(Iop_CmpF64
, get_ST(0), get_ST(i
))),
5481 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5483 static IRExpr
* x87ishly_qnarrow_32_to_16 ( IRExpr
* e32
)
5485 IRTemp t32
= newTemp(Ity_I32
);
5491 binop(Iop_Add32
, mkexpr(t32
), mkU32(32768))),
5493 unop(Iop_32to16
, mkexpr(t32
)),
5499 ULong
dis_FPU ( /*OUT*/Bool
* decode_ok
,
5500 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
5507 /* On entry, delta points at the second byte of the insn (the modrm
5509 UChar first_opcode
= getUChar(delta
-1);
5510 UChar modrm
= getUChar(delta
+0);
5512 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5514 if (first_opcode
== 0xD8) {
5517 /* bits 5,4,3 are an opcode extension, and the modRM also
5518 specifies an address. */
5519 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5522 switch (gregLO3ofRM(modrm
)) {
5524 case 0: /* FADD single-real */
5525 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
5528 case 1: /* FMUL single-real */
5529 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
5532 case 2: /* FCOM single-real */
5533 DIP("fcoms %s\n", dis_buf
);
5534 /* This forces C1 to zero, which isn't right. */
5535 /* The AMD documentation suggests that forcing C1 to
5536 zero is correct (Eliot Moss) */
5544 loadLE(Ity_F32
,mkexpr(addr
)))),
5550 case 3: /* FCOMP single-real */
5551 /* The AMD documentation suggests that forcing C1 to
5552 zero is correct (Eliot Moss) */
5553 DIP("fcomps %s\n", dis_buf
);
5554 /* This forces C1 to zero, which isn't right. */
5562 loadLE(Ity_F32
,mkexpr(addr
)))),
5569 case 4: /* FSUB single-real */
5570 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
5573 case 5: /* FSUBR single-real */
5574 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
5577 case 6: /* FDIV single-real */
5578 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
5581 case 7: /* FDIVR single-real */
5582 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
5586 vex_printf("unhandled opc_aux = 0x%2x\n",
5587 (UInt
)gregLO3ofRM(modrm
));
5588 vex_printf("first_opcode == 0xD8\n");
5595 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5596 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
5599 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5600 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
5603 /* Dunno if this is right */
5604 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5605 r_dst
= (UInt
)modrm
- 0xD0;
5606 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
5607 /* This forces C1 to zero, which isn't right. */
5612 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5618 /* Dunno if this is right */
5619 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5620 r_dst
= (UInt
)modrm
- 0xD8;
5621 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
5622 /* This forces C1 to zero, which isn't right. */
5627 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5634 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5635 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
5638 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5639 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
5642 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5643 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
5646 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5647 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
5656 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5658 if (first_opcode
== 0xD9) {
5661 /* bits 5,4,3 are an opcode extension, and the modRM also
5662 specifies an address. */
5663 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5666 switch (gregLO3ofRM(modrm
)) {
5668 case 0: /* FLD single-real */
5669 DIP("flds %s\n", dis_buf
);
5671 put_ST(0, unop(Iop_F32toF64
,
5672 loadLE(Ity_F32
, mkexpr(addr
))));
5675 case 2: /* FST single-real */
5676 DIP("fsts %s\n", dis_buf
);
5677 storeLE(mkexpr(addr
),
5678 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5681 case 3: /* FSTP single-real */
5682 DIP("fstps %s\n", dis_buf
);
5683 storeLE(mkexpr(addr
),
5684 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5688 case 4: { /* FLDENV m28 */
5689 /* Uses dirty helper:
5690 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5691 IRTemp ew
= newTemp(Ity_I32
);
5692 IRTemp w64
= newTemp(Ity_I64
);
5693 IRDirty
* d
= unsafeIRDirty_0_N (
5695 "amd64g_dirtyhelper_FLDENV",
5696 &amd64g_dirtyhelper_FLDENV
,
5697 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5700 /* declare we're reading memory */
5702 d
->mAddr
= mkexpr(addr
);
5705 /* declare we're writing guest state */
5707 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5709 d
->fxState
[0].fx
= Ifx_Write
;
5710 d
->fxState
[0].offset
= OFFB_FTOP
;
5711 d
->fxState
[0].size
= sizeof(UInt
);
5713 d
->fxState
[1].fx
= Ifx_Write
;
5714 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5715 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5717 d
->fxState
[2].fx
= Ifx_Write
;
5718 d
->fxState
[2].offset
= OFFB_FPROUND
;
5719 d
->fxState
[2].size
= sizeof(ULong
);
5721 d
->fxState
[3].fx
= Ifx_Write
;
5722 d
->fxState
[3].offset
= OFFB_FC3210
;
5723 d
->fxState
[3].size
= sizeof(ULong
);
5725 stmt( IRStmt_Dirty(d
) );
5727 /* ew contains any emulation warning we may need to
5728 issue. If needed, side-exit to the next insn,
5729 reporting the warning, so that Valgrind's dispatcher
5730 sees the warning. */
5731 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
5732 put_emwarn( mkexpr(ew
) );
5735 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5737 IRConst_U64( guest_RIP_bbstart
+delta
),
5742 DIP("fldenv %s\n", dis_buf
);
5746 case 5: {/* FLDCW */
5747 /* The only thing we observe in the control word is the
5748 rounding mode. Therefore, pass the 16-bit value
5749 (x87 native-format control word) to a clean helper,
5750 getting back a 64-bit value, the lower half of which
5751 is the FPROUND value to store, and the upper half of
5752 which is the emulation-warning token which may be
5755 /* ULong amd64h_check_fldcw ( ULong ); */
5756 IRTemp t64
= newTemp(Ity_I64
);
5757 IRTemp ew
= newTemp(Ity_I32
);
5758 DIP("fldcw %s\n", dis_buf
);
5759 assign( t64
, mkIRExprCCall(
5760 Ity_I64
, 0/*regparms*/,
5761 "amd64g_check_fldcw",
5762 &amd64g_check_fldcw
,
5765 loadLE(Ity_I16
, mkexpr(addr
)))
5770 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
5771 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
5772 put_emwarn( mkexpr(ew
) );
5773 /* Finally, if an emulation warning was reported,
5774 side-exit to the next insn, reporting the warning,
5775 so that Valgrind's dispatcher sees the warning. */
5778 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5780 IRConst_U64( guest_RIP_bbstart
+delta
),
5787 case 6: { /* FNSTENV m28 */
5788 /* Uses dirty helper:
5789 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5790 IRDirty
* d
= unsafeIRDirty_0_N (
5792 "amd64g_dirtyhelper_FSTENV",
5793 &amd64g_dirtyhelper_FSTENV
,
5794 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5796 /* declare we're writing memory */
5798 d
->mAddr
= mkexpr(addr
);
5801 /* declare we're reading guest state */
5803 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5805 d
->fxState
[0].fx
= Ifx_Read
;
5806 d
->fxState
[0].offset
= OFFB_FTOP
;
5807 d
->fxState
[0].size
= sizeof(UInt
);
5809 d
->fxState
[1].fx
= Ifx_Read
;
5810 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5811 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5813 d
->fxState
[2].fx
= Ifx_Read
;
5814 d
->fxState
[2].offset
= OFFB_FPROUND
;
5815 d
->fxState
[2].size
= sizeof(ULong
);
5817 d
->fxState
[3].fx
= Ifx_Read
;
5818 d
->fxState
[3].offset
= OFFB_FC3210
;
5819 d
->fxState
[3].size
= sizeof(ULong
);
5821 stmt( IRStmt_Dirty(d
) );
5823 DIP("fnstenv %s\n", dis_buf
);
5827 case 7: /* FNSTCW */
5828 /* Fake up a native x87 FPU control word. The only
5829 thing it depends on is FPROUND[1:0], so call a clean
5830 helper to cook it up. */
5831 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5832 DIP("fnstcw %s\n", dis_buf
);
5838 "amd64g_create_fpucw", &amd64g_create_fpucw
,
5839 mkIRExprVec_1( unop(Iop_32Uto64
, get_fpround()) )
5846 vex_printf("unhandled opc_aux = 0x%2x\n",
5847 (UInt
)gregLO3ofRM(modrm
));
5848 vex_printf("first_opcode == 0xD9\n");
5856 case 0xC0 ... 0xC7: /* FLD %st(?) */
5857 r_src
= (UInt
)modrm
- 0xC0;
5858 DIP("fld %%st(%u)\n", r_src
);
5859 t1
= newTemp(Ity_F64
);
5860 assign(t1
, get_ST(r_src
));
5862 put_ST(0, mkexpr(t1
));
5865 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5866 r_src
= (UInt
)modrm
- 0xC8;
5867 DIP("fxch %%st(%u)\n", r_src
);
5868 t1
= newTemp(Ity_F64
);
5869 t2
= newTemp(Ity_F64
);
5870 assign(t1
, get_ST(0));
5871 assign(t2
, get_ST(r_src
));
5872 put_ST_UNCHECKED(0, mkexpr(t2
));
5873 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
5876 case 0xE0: /* FCHS */
5878 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
5881 case 0xE1: /* FABS */
5883 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
5886 case 0xE5: { /* FXAM */
5887 /* This is an interesting one. It examines %st(0),
5888 regardless of whether the tag says it's empty or not.
5889 Here, just pass both the tag (in our format) and the
5890 value (as a double, actually a ULong) to a helper
5893 = mkIRExprVec_2( unop(Iop_8Uto64
, get_ST_TAG(0)),
5894 unop(Iop_ReinterpF64asI64
,
5895 get_ST_UNCHECKED(0)) );
5896 put_C3210(mkIRExprCCall(
5899 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM
,
5906 case 0xE8: /* FLD1 */
5909 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5910 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
5913 case 0xE9: /* FLDL2T */
5916 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5917 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
5920 case 0xEA: /* FLDL2E */
5923 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5924 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
5927 case 0xEB: /* FLDPI */
5930 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5931 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
5934 case 0xEC: /* FLDLG2 */
5937 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5938 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
5941 case 0xED: /* FLDLN2 */
5944 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5945 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
5948 case 0xEE: /* FLDZ */
5951 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5952 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
5955 case 0xF0: /* F2XM1 */
5959 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5963 case 0xF1: /* FYL2X */
5967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5973 case 0xF2: { /* FPTAN */
5975 IRTemp argD
= newTemp(Ity_F64
);
5976 assign(argD
, get_ST(0));
5977 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
5978 IRTemp resD
= newTemp(Ity_F64
);
5983 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5987 put_ST_UNCHECKED(0, mkexpr(resD
));
5988 /* Conditionally push 1.0 on the stack, if the arg is
5990 maybe_fp_push(argOK
);
5991 maybe_put_ST(argOK
, 0,
5992 IRExpr_Const(IRConst_F64(1.0)));
5993 set_C2( binop(Iop_Xor64
,
5994 unop(Iop_1Uto64
, mkexpr(argOK
)),
5999 case 0xF3: /* FPATAN */
6003 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6009 case 0xF4: { /* FXTRACT */
6010 IRTemp argF
= newTemp(Ity_F64
);
6011 IRTemp sigF
= newTemp(Ity_F64
);
6012 IRTemp expF
= newTemp(Ity_F64
);
6013 IRTemp argI
= newTemp(Ity_I64
);
6014 IRTemp sigI
= newTemp(Ity_I64
);
6015 IRTemp expI
= newTemp(Ity_I64
);
6017 assign( argF
, get_ST(0) );
6018 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
6021 Ity_I64
, 0/*regparms*/,
6022 "x86amd64g_calculate_FXTRACT",
6023 &x86amd64g_calculate_FXTRACT
,
6024 mkIRExprVec_2( mkexpr(argI
),
6025 mkIRExpr_HWord(0)/*sig*/ ))
6029 Ity_I64
, 0/*regparms*/,
6030 "x86amd64g_calculate_FXTRACT",
6031 &x86amd64g_calculate_FXTRACT
,
6032 mkIRExprVec_2( mkexpr(argI
),
6033 mkIRExpr_HWord(1)/*exp*/ ))
6035 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
6036 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
6038 put_ST_UNCHECKED(0, mkexpr(expF
) );
6041 put_ST(0, mkexpr(sigF
) );
6045 case 0xF5: { /* FPREM1 -- IEEE compliant */
6046 IRTemp a1
= newTemp(Ity_F64
);
6047 IRTemp a2
= newTemp(Ity_F64
);
6049 /* Do FPREM1 twice, once to get the remainder, and once
6050 to get the C3210 flag values. */
6051 assign( a1
, get_ST(0) );
6052 assign( a2
, get_ST(1) );
6055 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6060 triop(Iop_PRem1C3210F64
,
6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6067 case 0xF7: /* FINCSTP */
6069 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
6072 case 0xF8: { /* FPREM -- not IEEE compliant */
6073 IRTemp a1
= newTemp(Ity_F64
);
6074 IRTemp a2
= newTemp(Ity_F64
);
6076 /* Do FPREM twice, once to get the remainder, and once
6077 to get the C3210 flag values. */
6078 assign( a1
, get_ST(0) );
6079 assign( a2
, get_ST(1) );
6082 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6087 triop(Iop_PRemC3210F64
,
6088 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6094 case 0xF9: /* FYL2XP1 */
6097 triop(Iop_Yl2xp1F64
,
6098 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6104 case 0xFA: /* FSQRT */
6108 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6112 case 0xFB: { /* FSINCOS */
6114 IRTemp argD
= newTemp(Ity_F64
);
6115 assign(argD
, get_ST(0));
6116 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6117 IRTemp resD
= newTemp(Ity_F64
);
6122 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6126 put_ST_UNCHECKED(0, mkexpr(resD
));
6127 /* Conditionally push the cos value on the stack, if
6128 the arg is in range */
6129 maybe_fp_push(argOK
);
6130 maybe_put_ST(argOK
, 0,
6132 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6134 set_C2( binop(Iop_Xor64
,
6135 unop(Iop_1Uto64
, mkexpr(argOK
)),
6140 case 0xFC: /* FRNDINT */
6143 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
6146 case 0xFD: /* FSCALE */
6150 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6155 case 0xFE: /* FSIN */
6156 case 0xFF: { /* FCOS */
6157 Bool isSIN
= modrm
== 0xFE;
6158 DIP("%s\n", isSIN
? "fsin" : "fcos");
6159 IRTemp argD
= newTemp(Ity_F64
);
6160 assign(argD
, get_ST(0));
6161 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6162 IRTemp resD
= newTemp(Ity_F64
);
6166 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
6167 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6171 put_ST_UNCHECKED(0, mkexpr(resD
));
6172 set_C2( binop(Iop_Xor64
,
6173 unop(Iop_1Uto64
, mkexpr(argOK
)),
6184 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6186 if (first_opcode
== 0xDA) {
6190 /* bits 5,4,3 are an opcode extension, and the modRM also
6191 specifies an address. */
6193 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6195 switch (gregLO3ofRM(modrm
)) {
6197 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6198 DIP("fiaddl %s\n", dis_buf
);
6202 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6203 DIP("fimull %s\n", dis_buf
);
6207 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6208 DIP("fisubl %s\n", dis_buf
);
6212 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6213 DIP("fisubrl %s\n", dis_buf
);
6217 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6218 DIP("fisubl %s\n", dis_buf
);
6222 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6223 DIP("fidivrl %s\n", dis_buf
);
6230 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6233 loadLE(Ity_I32
, mkexpr(addr
)))));
6239 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6241 loadLE(Ity_I32
, mkexpr(addr
))),
6246 vex_printf("unhandled opc_aux = 0x%2x\n",
6247 (UInt
)gregLO3ofRM(modrm
));
6248 vex_printf("first_opcode == 0xDA\n");
6257 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6258 r_src
= (UInt
)modrm
- 0xC0;
6259 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
6262 mk_amd64g_calculate_condition(AMD64CondB
),
6263 get_ST(r_src
), get_ST(0)) );
6266 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6267 r_src
= (UInt
)modrm
- 0xC8;
6268 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
6271 mk_amd64g_calculate_condition(AMD64CondZ
),
6272 get_ST(r_src
), get_ST(0)) );
6275 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6276 r_src
= (UInt
)modrm
- 0xD0;
6277 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
6280 mk_amd64g_calculate_condition(AMD64CondBE
),
6281 get_ST(r_src
), get_ST(0)) );
6284 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6285 r_src
= (UInt
)modrm
- 0xD8;
6286 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
6289 mk_amd64g_calculate_condition(AMD64CondP
),
6290 get_ST(r_src
), get_ST(0)) );
6293 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6294 DIP("fucompp %%st(0),%%st(1)\n");
6295 /* This forces C1 to zero, which isn't right. */
6300 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
6315 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6317 if (first_opcode
== 0xDB) {
6320 /* bits 5,4,3 are an opcode extension, and the modRM also
6321 specifies an address. */
6322 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6325 switch (gregLO3ofRM(modrm
)) {
6327 case 0: /* FILD m32int */
6328 DIP("fildl %s\n", dis_buf
);
6330 put_ST(0, unop(Iop_I32StoF64
,
6331 loadLE(Ity_I32
, mkexpr(addr
))));
6334 case 1: /* FISTTPL m32 (SSE3) */
6335 DIP("fisttpl %s\n", dis_buf
);
6336 storeLE( mkexpr(addr
),
6337 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6341 case 2: /* FIST m32 */
6342 DIP("fistl %s\n", dis_buf
);
6343 storeLE( mkexpr(addr
),
6344 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6347 case 3: /* FISTP m32 */
6348 DIP("fistpl %s\n", dis_buf
);
6349 storeLE( mkexpr(addr
),
6350 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6354 case 5: { /* FLD extended-real */
6355 /* Uses dirty helper:
6356 ULong amd64g_loadF80le ( ULong )
6357 addr holds the address. First, do a dirty call to
6358 get hold of the data. */
6359 IRTemp val
= newTemp(Ity_I64
);
6360 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
6362 IRDirty
* d
= unsafeIRDirty_1_N (
6365 "amd64g_dirtyhelper_loadF80le",
6366 &amd64g_dirtyhelper_loadF80le
,
6369 /* declare that we're reading memory */
6371 d
->mAddr
= mkexpr(addr
);
6374 /* execute the dirty call, dumping the result in val. */
6375 stmt( IRStmt_Dirty(d
) );
6377 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
6379 DIP("fldt %s\n", dis_buf
);
6383 case 7: { /* FSTP extended-real */
6384 /* Uses dirty helper:
6385 void amd64g_storeF80le ( ULong addr, ULong data )
6388 = mkIRExprVec_2( mkexpr(addr
),
6389 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
6391 IRDirty
* d
= unsafeIRDirty_0_N (
6393 "amd64g_dirtyhelper_storeF80le",
6394 &amd64g_dirtyhelper_storeF80le
,
6397 /* declare we're writing memory */
6399 d
->mAddr
= mkexpr(addr
);
6402 /* execute the dirty call. */
6403 stmt( IRStmt_Dirty(d
) );
6406 DIP("fstpt\n %s", dis_buf
);
6411 vex_printf("unhandled opc_aux = 0x%2x\n",
6412 (UInt
)gregLO3ofRM(modrm
));
6413 vex_printf("first_opcode == 0xDB\n");
6422 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6423 r_src
= (UInt
)modrm
- 0xC0;
6424 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
6427 mk_amd64g_calculate_condition(AMD64CondNB
),
6428 get_ST(r_src
), get_ST(0)) );
6431 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6432 r_src
= (UInt
)modrm
- 0xC8;
6433 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
6437 mk_amd64g_calculate_condition(AMD64CondNZ
),
6444 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6445 r_src
= (UInt
)modrm
- 0xD0;
6446 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
6450 mk_amd64g_calculate_condition(AMD64CondNBE
),
6457 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6458 r_src
= (UInt
)modrm
- 0xD8;
6459 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
6463 mk_amd64g_calculate_condition(AMD64CondNP
),
6475 gen_FINIT_SEQUENCE(NULL
/*no guarding condition*/);
6480 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6481 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
6484 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6485 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
6494 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6496 if (first_opcode
== 0xDC) {
6499 /* bits 5,4,3 are an opcode extension, and the modRM also
6500 specifies an address. */
6501 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6504 switch (gregLO3ofRM(modrm
)) {
6506 case 0: /* FADD double-real */
6507 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
6510 case 1: /* FMUL double-real */
6511 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
6514 case 2: /* FCOM double-real */
6515 DIP("fcoml %s\n", dis_buf
);
6516 /* This forces C1 to zero, which isn't right. */
6523 loadLE(Ity_F64
,mkexpr(addr
))),
6529 case 3: /* FCOMP double-real */
6530 DIP("fcompl %s\n", dis_buf
);
6531 /* This forces C1 to zero, which isn't right. */
6538 loadLE(Ity_F64
,mkexpr(addr
))),
6545 case 4: /* FSUB double-real */
6546 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
6549 case 5: /* FSUBR double-real */
6550 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
6553 case 6: /* FDIV double-real */
6554 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
6557 case 7: /* FDIVR double-real */
6558 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
6562 vex_printf("unhandled opc_aux = 0x%2x\n",
6563 (UInt
)gregLO3ofRM(modrm
));
6564 vex_printf("first_opcode == 0xDC\n");
6573 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6574 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
6577 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6578 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
6581 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6582 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
6585 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6586 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
6589 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6590 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
6593 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6594 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
6604 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6606 if (first_opcode
== 0xDD) {
6610 /* bits 5,4,3 are an opcode extension, and the modRM also
6611 specifies an address. */
6612 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6615 switch (gregLO3ofRM(modrm
)) {
6617 case 0: /* FLD double-real */
6618 DIP("fldl %s\n", dis_buf
);
6620 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
6623 case 1: /* FISTTPQ m64 (SSE3) */
6624 DIP("fistppll %s\n", dis_buf
);
6625 storeLE( mkexpr(addr
),
6626 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6630 case 2: /* FST double-real */
6631 DIP("fstl %s\n", dis_buf
);
6632 storeLE(mkexpr(addr
), get_ST(0));
6635 case 3: /* FSTP double-real */
6636 DIP("fstpl %s\n", dis_buf
);
6637 storeLE(mkexpr(addr
), get_ST(0));
6641 case 4: { /* FRSTOR m94/m108 */
6642 IRTemp ew
= newTemp(Ity_I32
);
6643 IRTemp w64
= newTemp(Ity_I64
);
6645 if ( have66(pfx
) ) {
6646 /* Uses dirty helper:
6647 VexEmNote amd64g_dirtyhelper_FRSTORS
6648 ( VexGuestAMD64State*, HWord ) */
6649 d
= unsafeIRDirty_0_N (
6651 "amd64g_dirtyhelper_FRSTORS",
6652 &amd64g_dirtyhelper_FRSTORS
,
6653 mkIRExprVec_1( mkexpr(addr
) )
6657 /* Uses dirty helper:
6658 VexEmNote amd64g_dirtyhelper_FRSTOR
6659 ( VexGuestAMD64State*, HWord ) */
6660 d
= unsafeIRDirty_0_N (
6662 "amd64g_dirtyhelper_FRSTOR",
6663 &amd64g_dirtyhelper_FRSTOR
,
6664 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6670 /* declare we're reading memory */
6672 d
->mAddr
= mkexpr(addr
);
6673 /* d->mSize set above */
6675 /* declare we're writing guest state */
6677 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6679 d
->fxState
[0].fx
= Ifx_Write
;
6680 d
->fxState
[0].offset
= OFFB_FTOP
;
6681 d
->fxState
[0].size
= sizeof(UInt
);
6683 d
->fxState
[1].fx
= Ifx_Write
;
6684 d
->fxState
[1].offset
= OFFB_FPREGS
;
6685 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6687 d
->fxState
[2].fx
= Ifx_Write
;
6688 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6689 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6691 d
->fxState
[3].fx
= Ifx_Write
;
6692 d
->fxState
[3].offset
= OFFB_FPROUND
;
6693 d
->fxState
[3].size
= sizeof(ULong
);
6695 d
->fxState
[4].fx
= Ifx_Write
;
6696 d
->fxState
[4].offset
= OFFB_FC3210
;
6697 d
->fxState
[4].size
= sizeof(ULong
);
6699 stmt( IRStmt_Dirty(d
) );
6701 /* ew contains any emulation warning we may need to
6702 issue. If needed, side-exit to the next insn,
6703 reporting the warning, so that Valgrind's dispatcher
6704 sees the warning. */
6705 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
6706 put_emwarn( mkexpr(ew
) );
6709 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
6711 IRConst_U64( guest_RIP_bbstart
+delta
),
6716 if ( have66(pfx
) ) {
6717 DIP("frstors %s\n", dis_buf
);
6719 DIP("frstor %s\n", dis_buf
);
6724 case 6: { /* FNSAVE m94/m108 */
6726 if ( have66(pfx
) ) {
6727 /* Uses dirty helper:
6728 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6730 d
= unsafeIRDirty_0_N (
6732 "amd64g_dirtyhelper_FNSAVES",
6733 &amd64g_dirtyhelper_FNSAVES
,
6734 mkIRExprVec_1( mkexpr(addr
) )
6738 /* Uses dirty helper:
6739 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6741 d
= unsafeIRDirty_0_N (
6743 "amd64g_dirtyhelper_FNSAVE",
6744 &amd64g_dirtyhelper_FNSAVE
,
6745 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6750 /* declare we're writing memory */
6752 d
->mAddr
= mkexpr(addr
);
6753 /* d->mSize set above */
6755 /* declare we're reading guest state */
6757 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6759 d
->fxState
[0].fx
= Ifx_Read
;
6760 d
->fxState
[0].offset
= OFFB_FTOP
;
6761 d
->fxState
[0].size
= sizeof(UInt
);
6763 d
->fxState
[1].fx
= Ifx_Read
;
6764 d
->fxState
[1].offset
= OFFB_FPREGS
;
6765 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6767 d
->fxState
[2].fx
= Ifx_Read
;
6768 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6769 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6771 d
->fxState
[3].fx
= Ifx_Read
;
6772 d
->fxState
[3].offset
= OFFB_FPROUND
;
6773 d
->fxState
[3].size
= sizeof(ULong
);
6775 d
->fxState
[4].fx
= Ifx_Read
;
6776 d
->fxState
[4].offset
= OFFB_FC3210
;
6777 d
->fxState
[4].size
= sizeof(ULong
);
6779 stmt( IRStmt_Dirty(d
) );
6781 if ( have66(pfx
) ) {
6782 DIP("fnsaves %s\n", dis_buf
);
6784 DIP("fnsave %s\n", dis_buf
);
6789 case 7: { /* FNSTSW m16 */
6790 IRExpr
* sw
= get_FPU_sw();
6791 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
6792 storeLE( mkexpr(addr
), sw
);
6793 DIP("fnstsw %s\n", dis_buf
);
6798 vex_printf("unhandled opc_aux = 0x%2x\n",
6799 (UInt
)gregLO3ofRM(modrm
));
6800 vex_printf("first_opcode == 0xDD\n");
6807 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6808 r_dst
= (UInt
)modrm
- 0xC0;
6809 DIP("ffree %%st(%u)\n", r_dst
);
6810 put_ST_TAG ( r_dst
, mkU8(0) );
6813 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6814 r_dst
= (UInt
)modrm
- 0xD0;
6815 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
6816 /* P4 manual says: "If the destination operand is a
6817 non-empty register, the invalid-operation exception
6818 is not generated. Hence put_ST_UNCHECKED. */
6819 put_ST_UNCHECKED(r_dst
, get_ST(0));
6822 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6823 r_dst
= (UInt
)modrm
- 0xD8;
6824 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
6825 /* P4 manual says: "If the destination operand is a
6826 non-empty register, the invalid-operation exception
6827 is not generated. Hence put_ST_UNCHECKED. */
6828 put_ST_UNCHECKED(r_dst
, get_ST(0));
6832 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6833 r_dst
= (UInt
)modrm
- 0xE0;
6834 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
6835 /* This forces C1 to zero, which isn't right. */
6840 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6846 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6847 r_dst
= (UInt
)modrm
- 0xE8;
6848 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
6849 /* This forces C1 to zero, which isn't right. */
6854 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6867 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6869 if (first_opcode
== 0xDE) {
6873 /* bits 5,4,3 are an opcode extension, and the modRM also
6874 specifies an address. */
6876 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6879 switch (gregLO3ofRM(modrm
)) {
6881 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6882 DIP("fiaddw %s\n", dis_buf
);
6886 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6887 DIP("fimulw %s\n", dis_buf
);
6891 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6892 DIP("fisubw %s\n", dis_buf
);
6896 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6897 DIP("fisubrw %s\n", dis_buf
);
6901 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6902 DIP("fisubw %s\n", dis_buf
);
6906 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6907 DIP("fidivrw %s\n", dis_buf
);
6914 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6918 loadLE(Ity_I16
, mkexpr(addr
))))));
6924 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6927 loadLE(Ity_I16
, mkexpr(addr
)))),
6932 vex_printf("unhandled opc_aux = 0x%2x\n",
6933 (UInt
)gregLO3ofRM(modrm
));
6934 vex_printf("first_opcode == 0xDE\n");
6943 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6944 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
6947 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6948 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
6951 case 0xD9: /* FCOMPP %st(0),%st(1) */
6952 DIP("fcompp %%st(0),%%st(1)\n");
6953 /* This forces C1 to zero, which isn't right. */
6958 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
6966 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6967 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
6970 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6971 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
6974 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6975 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
6978 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6979 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
6989 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6991 if (first_opcode
== 0xDF) {
6995 /* bits 5,4,3 are an opcode extension, and the modRM also
6996 specifies an address. */
6997 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7000 switch (gregLO3ofRM(modrm
)) {
7002 case 0: /* FILD m16int */
7003 DIP("fildw %s\n", dis_buf
);
7005 put_ST(0, unop(Iop_I32StoF64
,
7007 loadLE(Ity_I16
, mkexpr(addr
)))));
7010 case 1: /* FISTTPS m16 (SSE3) */
7011 DIP("fisttps %s\n", dis_buf
);
7012 storeLE( mkexpr(addr
),
7013 x87ishly_qnarrow_32_to_16(
7014 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) ));
7018 case 2: /* FIST m16 */
7019 DIP("fists %s\n", dis_buf
);
7020 storeLE( mkexpr(addr
),
7021 x87ishly_qnarrow_32_to_16(
7022 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7025 case 3: /* FISTP m16 */
7026 DIP("fistps %s\n", dis_buf
);
7027 storeLE( mkexpr(addr
),
7028 x87ishly_qnarrow_32_to_16(
7029 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7033 case 5: /* FILD m64 */
7034 DIP("fildll %s\n", dis_buf
);
7036 put_ST(0, binop(Iop_I64StoF64
,
7038 loadLE(Ity_I64
, mkexpr(addr
))));
7041 case 7: /* FISTP m64 */
7042 DIP("fistpll %s\n", dis_buf
);
7043 storeLE( mkexpr(addr
),
7044 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
7049 vex_printf("unhandled opc_aux = 0x%2x\n",
7050 (UInt
)gregLO3ofRM(modrm
));
7051 vex_printf("first_opcode == 0xDF\n");
7060 case 0xC0: /* FFREEP %st(0) */
7061 DIP("ffreep %%st(%d)\n", 0);
7062 put_ST_TAG ( 0, mkU8(0) );
7066 case 0xE0: /* FNSTSW %ax */
7067 DIP("fnstsw %%ax\n");
7068 /* Invent a plausible-looking FPU status word value and
7070 ((ftop & 7) << 11) | (c3210 & 0x4700)
7077 binop(Iop_And32
, get_ftop(), mkU32(7)),
7080 unop(Iop_64to32
, get_C3210()),
7085 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7086 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
7089 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7090 /* not really right since COMIP != UCOMIP */
7091 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
7113 /*------------------------------------------------------------*/
7115 /*--- MMX INSTRUCTIONS ---*/
7117 /*------------------------------------------------------------*/
7119 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7120 IA32 arch manual, volume 3):
7122 Read from, or write to MMX register (viz, any insn except EMMS):
7123 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7124 * FP stack pointer set to zero
7127 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7128 * FP stack pointer set to zero
7131 static void do_MMX_preamble ( void )
7134 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7135 IRExpr
* zero
= mkU32(0);
7136 IRExpr
* tag1
= mkU8(1);
7138 for (i
= 0; i
< 8; i
++)
7139 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
7142 static void do_EMMS_preamble ( void )
7145 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7146 IRExpr
* zero
= mkU32(0);
7147 IRExpr
* tag0
= mkU8(0);
7149 for (i
= 0; i
< 8; i
++)
7150 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
7154 static IRExpr
* getMMXReg ( UInt archreg
)
7156 vassert(archreg
< 8);
7157 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
7161 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
7163 vassert(archreg
< 8);
7164 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
7165 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
7169 /* Helper for non-shift MMX insns. Note this is incomplete in the
7170 sense that it does not first call do_MMX_preamble() -- that is the
7171 responsibility of its caller. */
7174 ULong
dis_MMXop_regmem_to_reg ( const VexAbiInfo
* vbi
,
7179 Bool show_granularity
)
7182 UChar modrm
= getUChar(delta
);
7183 Bool isReg
= epartIsReg(modrm
);
7184 IRExpr
* argL
= NULL
;
7185 IRExpr
* argR
= NULL
;
7186 IRExpr
* argG
= NULL
;
7187 IRExpr
* argE
= NULL
;
7188 IRTemp res
= newTemp(Ity_I64
);
7191 IROp op
= Iop_INVALID
;
7193 const HChar
* hName
= NULL
;
7196 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7199 /* Original MMX ones */
7200 case 0xFC: op
= Iop_Add8x8
; break;
7201 case 0xFD: op
= Iop_Add16x4
; break;
7202 case 0xFE: op
= Iop_Add32x2
; break;
7204 case 0xEC: op
= Iop_QAdd8Sx8
; break;
7205 case 0xED: op
= Iop_QAdd16Sx4
; break;
7207 case 0xDC: op
= Iop_QAdd8Ux8
; break;
7208 case 0xDD: op
= Iop_QAdd16Ux4
; break;
7210 case 0xF8: op
= Iop_Sub8x8
; break;
7211 case 0xF9: op
= Iop_Sub16x4
; break;
7212 case 0xFA: op
= Iop_Sub32x2
; break;
7214 case 0xE8: op
= Iop_QSub8Sx8
; break;
7215 case 0xE9: op
= Iop_QSub16Sx4
; break;
7217 case 0xD8: op
= Iop_QSub8Ux8
; break;
7218 case 0xD9: op
= Iop_QSub16Ux4
; break;
7220 case 0xE5: op
= Iop_MulHi16Sx4
; break;
7221 case 0xD5: op
= Iop_Mul16x4
; break;
7222 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd
); break;
7224 case 0x74: op
= Iop_CmpEQ8x8
; break;
7225 case 0x75: op
= Iop_CmpEQ16x4
; break;
7226 case 0x76: op
= Iop_CmpEQ32x2
; break;
7228 case 0x64: op
= Iop_CmpGT8Sx8
; break;
7229 case 0x65: op
= Iop_CmpGT16Sx4
; break;
7230 case 0x66: op
= Iop_CmpGT32Sx2
; break;
7232 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
7233 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
7234 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
7236 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
7237 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
7238 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
7240 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
7241 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
7242 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
7244 case 0xDB: op
= Iop_And64
; break;
7245 case 0xDF: op
= Iop_And64
; invG
= True
; break;
7246 case 0xEB: op
= Iop_Or64
; break;
7247 case 0xEF: /* Possibly do better here if argL and argR are the
7249 op
= Iop_Xor64
; break;
7251 /* Introduced in SSE1 */
7252 case 0xE0: op
= Iop_Avg8Ux8
; break;
7253 case 0xE3: op
= Iop_Avg16Ux4
; break;
7254 case 0xEE: op
= Iop_Max16Sx4
; break;
7255 case 0xDE: op
= Iop_Max8Ux8
; break;
7256 case 0xEA: op
= Iop_Min16Sx4
; break;
7257 case 0xDA: op
= Iop_Min8Ux8
; break;
7258 case 0xE4: op
= Iop_MulHi16Ux4
; break;
7259 case 0xF6: XXX(amd64g_calculate_mmx_psadbw
); break;
7261 /* Introduced in SSE2 */
7262 case 0xD4: op
= Iop_Add64
; break;
7263 case 0xFB: op
= Iop_Sub64
; break;
7266 vex_printf("\n0x%x\n", (UInt
)opc
);
7267 vpanic("dis_MMXop_regmem_to_reg");
7272 argG
= getMMXReg(gregLO3ofRM(modrm
));
7274 argG
= unop(Iop_Not64
, argG
);
7278 argE
= getMMXReg(eregLO3ofRM(modrm
));
7281 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7283 argE
= loadLE(Ity_I64
, mkexpr(addr
));
7294 if (op
!= Iop_INVALID
) {
7295 vassert(hName
== NULL
);
7296 vassert(hAddr
== NULL
);
7297 assign(res
, binop(op
, argL
, argR
));
7299 vassert(hName
!= NULL
);
7300 vassert(hAddr
!= NULL
);
7304 0/*regparms*/, hName
, hAddr
,
7305 mkIRExprVec_2( argL
, argR
)
7310 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
7312 DIP("%s%s %s, %s\n",
7313 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
7314 ( isReg
? nameMMXReg(eregLO3ofRM(modrm
)) : dis_buf
),
7315 nameMMXReg(gregLO3ofRM(modrm
)) );
7321 /* Vector by scalar shift of G by the amount specified at the bottom
7322 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7324 static ULong
dis_MMX_shiftG_byE ( const VexAbiInfo
* vbi
,
7325 Prefix pfx
, Long delta
,
7326 const HChar
* opname
, IROp op
)
7332 UChar rm
= getUChar(delta
);
7333 IRTemp g0
= newTemp(Ity_I64
);
7334 IRTemp g1
= newTemp(Ity_I64
);
7335 IRTemp amt
= newTemp(Ity_I64
);
7336 IRTemp amt8
= newTemp(Ity_I8
);
7338 if (epartIsReg(rm
)) {
7339 assign( amt
, getMMXReg(eregLO3ofRM(rm
)) );
7340 DIP("%s %s,%s\n", opname
,
7341 nameMMXReg(eregLO3ofRM(rm
)),
7342 nameMMXReg(gregLO3ofRM(rm
)) );
7345 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
7346 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
7347 DIP("%s %s,%s\n", opname
,
7349 nameMMXReg(gregLO3ofRM(rm
)) );
7352 assign( g0
, getMMXReg(gregLO3ofRM(rm
)) );
7353 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
7355 shl
= shr
= sar
= False
;
7358 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
7359 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7360 case Iop_Shl64
: shl
= True
; size
= 64; break;
7361 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7362 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7363 case Iop_Shr64
: shr
= True
; size
= 64; break;
7364 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7365 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7366 default: vassert(0);
7373 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7374 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7383 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7384 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7385 binop(op
, mkexpr(g0
), mkU8(size
-1))
7392 putMMXReg( gregLO3ofRM(rm
), mkexpr(g1
) );
7397 /* Vector by scalar shift of E by an immediate byte. This is a
7398 straight copy of dis_SSE_shiftE_imm. */
7401 ULong
dis_MMX_shiftE_imm ( Long delta
, const HChar
* opname
, IROp op
)
7404 UChar rm
= getUChar(delta
);
7405 IRTemp e0
= newTemp(Ity_I64
);
7406 IRTemp e1
= newTemp(Ity_I64
);
7408 vassert(epartIsReg(rm
));
7409 vassert(gregLO3ofRM(rm
) == 2
7410 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
7411 amt
= getUChar(delta
+1);
7413 DIP("%s $%d,%s\n", opname
,
7415 nameMMXReg(eregLO3ofRM(rm
)) );
7417 assign( e0
, getMMXReg(eregLO3ofRM(rm
)) );
7419 shl
= shr
= sar
= False
;
7422 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
7423 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7424 case Iop_Shl64
: shl
= True
; size
= 64; break;
7425 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7426 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7427 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7428 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7429 case Iop_Shr64
: shr
= True
; size
= 64; break;
7430 default: vassert(0);
7434 assign( e1
, amt
>= size
7436 : binop(op
, mkexpr(e0
), mkU8(amt
))
7440 assign( e1
, amt
>= size
7441 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7442 : binop(op
, mkexpr(e0
), mkU8(amt
))
7448 putMMXReg( eregLO3ofRM(rm
), mkexpr(e1
) );
7453 /* Completely handle all MMX instructions except emms. */
7456 ULong
dis_MMX ( Bool
* decode_ok
,
7457 const VexAbiInfo
* vbi
, Prefix pfx
, Int sz
, Long delta
)
7462 UChar opc
= getUChar(delta
);
7465 /* dis_MMX handles all insns except emms. */
7472 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7473 modrm
= getUChar(delta
);
7474 if (epartIsReg(modrm
)) {
7478 binop( Iop_32HLto64
,
7480 getIReg32(eregOfRexRM(pfx
,modrm
)) ) );
7481 DIP("movd %s, %s\n",
7482 nameIReg32(eregOfRexRM(pfx
,modrm
)),
7483 nameMMXReg(gregLO3ofRM(modrm
)));
7485 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7489 binop( Iop_32HLto64
,
7491 loadLE(Ity_I32
, mkexpr(addr
)) ) );
7492 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7497 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7498 modrm
= getUChar(delta
);
7499 if (epartIsReg(modrm
)) {
7501 putMMXReg( gregLO3ofRM(modrm
),
7502 getIReg64(eregOfRexRM(pfx
,modrm
)) );
7503 DIP("movd %s, %s\n",
7504 nameIReg64(eregOfRexRM(pfx
,modrm
)),
7505 nameMMXReg(gregLO3ofRM(modrm
)));
7507 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7509 putMMXReg( gregLO3ofRM(modrm
),
7510 loadLE(Ity_I64
, mkexpr(addr
)) );
7511 DIP("movd{64} %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7515 goto mmx_decode_failure
;
7521 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7522 modrm
= getUChar(delta
);
7523 if (epartIsReg(modrm
)) {
7525 putIReg32( eregOfRexRM(pfx
,modrm
),
7526 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7527 DIP("movd %s, %s\n",
7528 nameMMXReg(gregLO3ofRM(modrm
)),
7529 nameIReg32(eregOfRexRM(pfx
,modrm
)));
7531 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7533 storeLE( mkexpr(addr
),
7534 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7535 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7540 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7541 modrm
= getUChar(delta
);
7542 if (epartIsReg(modrm
)) {
7544 putIReg64( eregOfRexRM(pfx
,modrm
),
7545 getMMXReg(gregLO3ofRM(modrm
)) );
7546 DIP("movd %s, %s\n",
7547 nameMMXReg(gregLO3ofRM(modrm
)),
7548 nameIReg64(eregOfRexRM(pfx
,modrm
)));
7550 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7552 storeLE( mkexpr(addr
),
7553 getMMXReg(gregLO3ofRM(modrm
)) );
7554 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7557 goto mmx_decode_failure
;
7562 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7564 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7565 goto mmx_decode_failure
;
7566 modrm
= getUChar(delta
);
7567 if (epartIsReg(modrm
)) {
7569 putMMXReg( gregLO3ofRM(modrm
), getMMXReg(eregLO3ofRM(modrm
)) );
7570 DIP("movq %s, %s\n",
7571 nameMMXReg(eregLO3ofRM(modrm
)),
7572 nameMMXReg(gregLO3ofRM(modrm
)));
7574 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7576 putMMXReg( gregLO3ofRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
7577 DIP("movq %s, %s\n",
7578 dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7583 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7585 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7586 goto mmx_decode_failure
;
7587 modrm
= getUChar(delta
);
7588 if (epartIsReg(modrm
)) {
7590 putMMXReg( eregLO3ofRM(modrm
), getMMXReg(gregLO3ofRM(modrm
)) );
7591 DIP("movq %s, %s\n",
7592 nameMMXReg(gregLO3ofRM(modrm
)),
7593 nameMMXReg(eregLO3ofRM(modrm
)));
7595 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7597 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
7598 DIP("mov(nt)q %s, %s\n",
7599 nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7605 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7607 goto mmx_decode_failure
;
7608 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padd", True
);
7612 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7614 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7615 goto mmx_decode_failure
;
7616 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padds", True
);
7620 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7622 goto mmx_decode_failure
;
7623 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "paddus", True
);
7628 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7630 goto mmx_decode_failure
;
7631 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psub", True
);
7635 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7637 goto mmx_decode_failure
;
7638 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubs", True
);
7642 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7644 goto mmx_decode_failure
;
7645 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubus", True
);
7648 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7650 goto mmx_decode_failure
;
7651 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmulhw", False
);
7654 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7656 goto mmx_decode_failure
;
7657 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmullw", False
);
7660 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7662 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmaddwd", False
);
7667 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7669 goto mmx_decode_failure
;
7670 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpeq", True
);
7675 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7677 goto mmx_decode_failure
;
7678 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpgt", True
);
7681 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7683 goto mmx_decode_failure
;
7684 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packssdw", False
);
7687 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7689 goto mmx_decode_failure
;
7690 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packsswb", False
);
7693 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7695 goto mmx_decode_failure
;
7696 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packuswb", False
);
7701 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7703 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7704 goto mmx_decode_failure
;
7705 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckh", True
);
7710 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7712 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7713 goto mmx_decode_failure
;
7714 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckl", True
);
7717 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7719 goto mmx_decode_failure
;
7720 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pand", False
);
7723 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7725 goto mmx_decode_failure
;
7726 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pandn", False
);
7729 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7731 goto mmx_decode_failure
;
7732 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "por", False
);
7735 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7737 goto mmx_decode_failure
;
7738 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pxor", False
);
7741 # define SHIFT_BY_REG(_name,_op) \
7742 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7745 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7746 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
7747 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
7748 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
7750 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7751 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
7752 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
7753 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
7755 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7756 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
7757 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
7759 # undef SHIFT_BY_REG
7764 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7765 UChar byte2
, subopc
;
7767 goto mmx_decode_failure
;
7768 byte2
= getUChar(delta
); /* amode / sub-opcode */
7769 subopc
= toUChar( (byte2
>> 3) & 7 );
7771 # define SHIFT_BY_IMM(_name,_op) \
7772 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7775 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
7776 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
7777 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
7778 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
7779 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
7780 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
7782 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
7783 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
7784 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
7785 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
7787 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
7788 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
7789 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
7790 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
7791 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
7792 SHIFT_BY_IMM("psllq", Iop_Shl64
);
7794 else goto mmx_decode_failure
;
7796 # undef SHIFT_BY_IMM
7801 IRTemp addr
= newTemp(Ity_I64
);
7802 IRTemp regD
= newTemp(Ity_I64
);
7803 IRTemp regM
= newTemp(Ity_I64
);
7804 IRTemp mask
= newTemp(Ity_I64
);
7805 IRTemp olddata
= newTemp(Ity_I64
);
7806 IRTemp newdata
= newTemp(Ity_I64
);
7808 modrm
= getUChar(delta
);
7809 if (sz
!= 4 || (!epartIsReg(modrm
)))
7810 goto mmx_decode_failure
;
7813 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
7814 assign( regM
, getMMXReg( eregLO3ofRM(modrm
) ));
7815 assign( regD
, getMMXReg( gregLO3ofRM(modrm
) ));
7816 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
7817 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
7825 unop(Iop_Not64
, mkexpr(mask
)))) );
7826 storeLE( mkexpr(addr
), mkexpr(newdata
) );
7827 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm
) ),
7828 nameMMXReg( gregLO3ofRM(modrm
) ) );
7832 /* --- MMX decode failure --- */
7836 return delta
; /* ignored */
7845 /*------------------------------------------------------------*/
7846 /*--- More misc arithmetic and other obscure insns. ---*/
7847 /*------------------------------------------------------------*/
7849 /* Generate base << amt with vacated places filled with stuff
7850 from xtra. amt guaranteed in 0 .. 63. */
7852 IRExpr
* shiftL64_with_extras ( IRTemp base
, IRTemp xtra
, IRTemp amt
)
7856 else (base << amt) | (xtra >>u (64-amt))
7860 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7862 binop(Iop_Shl64
, mkexpr(base
), mkexpr(amt
)),
7863 binop(Iop_Shr64
, mkexpr(xtra
),
7864 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7870 /* Generate base >>u amt with vacated places filled with stuff
7871 from xtra. amt guaranteed in 0 .. 63. */
7873 IRExpr
* shiftR64_with_extras ( IRTemp xtra
, IRTemp base
, IRTemp amt
)
7877 else (base >>u amt) | (xtra << (64-amt))
7881 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7883 binop(Iop_Shr64
, mkexpr(base
), mkexpr(amt
)),
7884 binop(Iop_Shl64
, mkexpr(xtra
),
7885 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7891 /* Double length left and right shifts. Apparently only required in
7892 v-size (no b- variant). */
7894 ULong
dis_SHLRD_Gv_Ev ( const VexAbiInfo
* vbi
,
7896 Long delta
, UChar modrm
,
7899 Bool amt_is_literal
,
7900 const HChar
* shift_amt_txt
,
7903 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7904 for printing it. And eip on entry points at the modrm byte. */
7908 IRType ty
= szToITy(sz
);
7909 IRTemp gsrc
= newTemp(ty
);
7910 IRTemp esrc
= newTemp(ty
);
7911 IRTemp addr
= IRTemp_INVALID
;
7912 IRTemp tmpSH
= newTemp(Ity_I8
);
7913 IRTemp tmpSS
= newTemp(Ity_I8
);
7914 IRTemp tmp64
= IRTemp_INVALID
;
7915 IRTemp res64
= IRTemp_INVALID
;
7916 IRTemp rss64
= IRTemp_INVALID
;
7917 IRTemp resTy
= IRTemp_INVALID
;
7918 IRTemp rssTy
= IRTemp_INVALID
;
7919 Int mask
= sz
==8 ? 63 : 31;
7921 vassert(sz
== 2 || sz
== 4 || sz
== 8);
7923 /* The E-part is the destination; this is shifted. The G-part
7924 supplies bits to be shifted into the E-part, but is not
7927 If shifting left, form a double-length word with E at the top
7928 and G at the bottom, and shift this left. The result is then in
7931 If shifting right, form a double-length word with G at the top
7932 and E at the bottom, and shift this right. The result is then
7935 /* Fetch the operands. */
7937 assign( gsrc
, getIRegG(sz
, pfx
, modrm
) );
7939 if (epartIsReg(modrm
)) {
7941 assign( esrc
, getIRegE(sz
, pfx
, modrm
) );
7942 DIP("sh%cd%c %s, %s, %s\n",
7943 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
7945 nameIRegG(sz
, pfx
, modrm
), nameIRegE(sz
, pfx
, modrm
));
7947 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
7948 /* # bytes following amode */
7949 amt_is_literal
? 1 : 0 );
7951 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
7952 DIP("sh%cd%c %s, %s, %s\n",
7953 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
7955 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
7958 /* Calculate the masked shift amount (tmpSH), the masked subshift
7959 amount (tmpSS), the shifted value (res64) and the subshifted
7962 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mkU8(mask
)) );
7963 assign( tmpSS
, binop(Iop_And8
,
7964 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
7967 tmp64
= newTemp(Ity_I64
);
7968 res64
= newTemp(Ity_I64
);
7969 rss64
= newTemp(Ity_I64
);
7971 if (sz
== 2 || sz
== 4) {
7973 /* G is xtra; E is data */
7974 /* what a freaking nightmare: */
7975 if (sz
== 4 && left_shift
) {
7976 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(esrc
), mkexpr(gsrc
)) );
7979 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
7983 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSS
)),
7987 if (sz
== 4 && !left_shift
) {
7988 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(gsrc
), mkexpr(esrc
)) );
7989 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
7990 assign( rss64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSS
)) );
7993 if (sz
== 2 && left_shift
) {
7996 binop(Iop_16HLto32
, mkexpr(esrc
), mkexpr(gsrc
)),
7997 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
))
7999 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8002 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
8004 /* subshift formed by shifting [esrc'0000'0000'0000] */
8008 binop(Iop_Shl64
, unop(Iop_16Uto64
, mkexpr(esrc
)),
8014 if (sz
== 2 && !left_shift
) {
8017 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
)),
8018 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(esrc
))
8020 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8021 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8022 /* subshift formed by shifting [0000'0000'0000'esrc] */
8023 assign( rss64
, binop(Iop_Shr64
,
8024 unop(Iop_16Uto64
, mkexpr(esrc
)),
8032 assign( res64
, shiftL64_with_extras( esrc
, gsrc
, tmpSH
));
8033 assign( rss64
, shiftL64_with_extras( esrc
, gsrc
, tmpSS
));
8035 assign( res64
, shiftR64_with_extras( gsrc
, esrc
, tmpSH
));
8036 assign( rss64
, shiftR64_with_extras( gsrc
, esrc
, tmpSS
));
8041 resTy
= newTemp(ty
);
8042 rssTy
= newTemp(ty
);
8043 assign( resTy
, narrowTo(ty
, mkexpr(res64
)) );
8044 assign( rssTy
, narrowTo(ty
, mkexpr(rss64
)) );
8046 /* Put result back and write the flags thunk. */
8047 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl64
: Iop_Sar64
,
8048 resTy
, rssTy
, ty
, tmpSH
);
8050 if (epartIsReg(modrm
)) {
8051 putIRegE(sz
, pfx
, modrm
, mkexpr(resTy
));
8053 storeLE( mkexpr(addr
), mkexpr(resTy
) );
8056 if (amt_is_literal
) delta
++;
8061 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8064 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
8066 static const HChar
* nameBtOp ( BtOp op
)
8069 case BtOpNone
: return "";
8070 case BtOpSet
: return "s";
8071 case BtOpReset
: return "r";
8072 case BtOpComp
: return "c";
8073 default: vpanic("nameBtOp(amd64)");
8079 ULong
dis_bt_G_E ( const VexAbiInfo
* vbi
,
8080 Prefix pfx
, Int sz
, Long delta
, BtOp op
,
8081 /*OUT*/Bool
* decode_OK
)
8086 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
8087 t_addr1
, t_rsp
, t_mask
, t_new
;
8089 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8091 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
8092 = t_addr0
= t_addr1
= t_rsp
8093 = t_mask
= t_new
= IRTemp_INVALID
;
8095 t_fetched
= newTemp(Ity_I8
);
8096 t_new
= newTemp(Ity_I8
);
8097 t_bitno0
= newTemp(Ity_I64
);
8098 t_bitno1
= newTemp(Ity_I64
);
8099 t_bitno2
= newTemp(Ity_I8
);
8100 t_addr1
= newTemp(Ity_I64
);
8101 modrm
= getUChar(delta
);
8104 if (epartIsReg(modrm
)) {
8105 /* F2 and F3 are never acceptable. */
8106 if (haveF2orF3(pfx
)) {
8111 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8112 present, and only for the BTC/BTS/BTR cases (not BT). */
8113 if (haveF2orF3(pfx
)) {
8114 if (haveF2andF3(pfx
) || !haveLOCK(pfx
) || op
== BtOpNone
) {
8121 assign( t_bitno0
, widenSto64(getIRegG(sz
, pfx
, modrm
)) );
8123 if (epartIsReg(modrm
)) {
8125 /* Get it onto the client's stack. Oh, this is a horrible
8126 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8127 Because of the ELF ABI stack redzone, there may be live data
8128 up to 128 bytes below %RSP. So we can't just push it on the
8129 stack, else we may wind up trashing live data, and causing
8130 impossible-to-find simulation errors. (Yes, this did
8131 happen.) So we need to drop RSP before at least 128 before
8132 pushing it. That unfortunately means hitting Memcheck's
8133 fast-case painting code. Ideally we should drop more than
8134 128, to reduce the chances of breaking buggy programs that
8135 have live data below -128(%RSP). Memcheck fast-cases moves
8136 of 288 bytes due to the need to handle ppc64-linux quickly,
8137 so let's use 288. Of course the real fix is to get rid of
8138 this kludge entirely. */
8139 t_rsp
= newTemp(Ity_I64
);
8140 t_addr0
= newTemp(Ity_I64
);
8142 vassert(vbi
->guest_stack_redzone_size
== 128);
8143 assign( t_rsp
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(288)) );
8144 putIReg64(R_RSP
, mkexpr(t_rsp
));
8146 storeLE( mkexpr(t_rsp
), getIRegE(sz
, pfx
, modrm
) );
8148 /* Make t_addr0 point at it. */
8149 assign( t_addr0
, mkexpr(t_rsp
) );
8151 /* Mask out upper bits of the shift amount, since we're doing a
8153 assign( t_bitno1
, binop(Iop_And64
,
8155 mkU64(sz
== 8 ? 63 : sz
== 4 ? 31 : 15)) );
8158 t_addr0
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8160 assign( t_bitno1
, mkexpr(t_bitno0
) );
8163 /* At this point: t_addr0 is the address being operated on. If it
8164 was a reg, we will have pushed it onto the client's stack.
8165 t_bitno1 is the bit number, suitably masked in the case of a
8168 /* Now the main sequence. */
8172 binop(Iop_Sar64
, mkexpr(t_bitno1
), mkU8(3))) );
8174 /* t_addr1 now holds effective address */
8178 binop(Iop_And64
, mkexpr(t_bitno1
), mkU64(7))) );
8180 /* t_bitno2 contains offset of bit within byte */
8182 if (op
!= BtOpNone
) {
8183 t_mask
= newTemp(Ity_I8
);
8184 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
8187 /* t_mask is now a suitable byte mask */
8189 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
8191 if (op
!= BtOpNone
) {
8195 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8199 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8203 binop(Iop_And8
, mkexpr(t_fetched
),
8204 unop(Iop_Not8
, mkexpr(t_mask
))) );
8207 vpanic("dis_bt_G_E(amd64)");
8209 if ((haveLOCK(pfx
)) && !epartIsReg(modrm
)) {
8210 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
8211 mkexpr(t_new
)/*new*/,
8212 guest_RIP_curr_instr
);
8214 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
8218 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8219 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8220 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8221 are also unchanged, so let's do that. */
8222 const ULong maskC
= AMD64G_CC_MASK_C
;
8223 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
8224 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
8227 IRTemp old_rflags
= newTemp(Ity_I64
);
8228 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
8230 IRTemp new_rflags
= newTemp(Ity_I64
);
8233 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
8236 unop(Iop_8Uto64
, mkexpr(t_fetched
)),
8240 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8241 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8242 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
8243 /* Set NDEP even though it isn't used. This makes redundant-PUT
8244 elimination of previous stores to this field work better. */
8245 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8247 /* Move reg operand from stack back to reg */
8248 if (epartIsReg(modrm
)) {
8249 /* t_rsp still points at it. */
8250 /* only write the reg if actually modifying it; doing otherwise
8251 zeroes the top half erroneously when doing btl due to
8252 standard zero-extend rule */
8254 putIRegE(sz
, pfx
, modrm
, loadLE(szToITy(sz
), mkexpr(t_rsp
)) );
8255 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t_rsp
), mkU64(288)) );
8258 DIP("bt%s%c %s, %s\n",
8259 nameBtOp(op
), nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
8260 ( epartIsReg(modrm
) ? nameIRegE(sz
, pfx
, modrm
) : dis_buf
) );
8267 /* Handle BSF/BSR. Only v-size seems necessary. */
8269 ULong
dis_bs_E_G ( const VexAbiInfo
* vbi
,
8270 Prefix pfx
, Int sz
, Long delta
, Bool fwds
)
8276 IRType ty
= szToITy(sz
);
8277 IRTemp src
= newTemp(ty
);
8278 IRTemp dst
= newTemp(ty
);
8279 IRTemp src64
= newTemp(Ity_I64
);
8280 IRTemp dst64
= newTemp(Ity_I64
);
8281 IRTemp srcB
= newTemp(Ity_I1
);
8283 vassert(sz
== 8 || sz
== 4 || sz
== 2);
8285 modrm
= getUChar(delta
);
8286 isReg
= epartIsReg(modrm
);
8289 assign( src
, getIRegE(sz
, pfx
, modrm
) );
8292 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8294 assign( src
, loadLE(ty
, mkexpr(addr
)) );
8297 DIP("bs%c%c %s, %s\n",
8298 fwds
? 'f' : 'r', nameISize(sz
),
8299 ( isReg
? nameIRegE(sz
, pfx
, modrm
) : dis_buf
),
8300 nameIRegG(sz
, pfx
, modrm
));
8302 /* First, widen src to 64 bits if it is not already. */
8303 assign( src64
, widenUto64(mkexpr(src
)) );
8305 /* Generate a bool expression which is zero iff the original is
8306 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8307 instrumented by Memcheck, is instrumented expensively, since
8308 this may be used on the output of a preceding movmskb insn,
8309 which has been known to be partially defined, and in need of
8310 careful handling. */
8311 assign( srcB
, binop(Iop_ExpCmpNE64
, mkexpr(src64
), mkU64(0)) );
8313 /* Flags: Z is 1 iff source value is zero. All others
8314 are undefined -- we force them to zero. */
8315 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8316 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8319 IRExpr_ITE( mkexpr(srcB
),
8323 mkU64(AMD64G_CC_MASK_Z
)
8326 /* Set NDEP even though it isn't used. This makes redundant-PUT
8327 elimination of previous stores to this field work better. */
8328 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8330 /* Result: iff source value is zero, we can't use
8331 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8332 But anyway, amd64 semantics say the result is undefined in
8333 such situations. Hence handle the zero case specially. */
8335 /* Bleh. What we compute:
8337 bsf64: if src == 0 then {dst is unchanged}
8340 bsr64: if src == 0 then {dst is unchanged}
8341 else 63 - Clz64(src)
8343 bsf32: if src == 0 then {dst is unchanged}
8344 else Ctz64(32Uto64(src))
8346 bsr32: if src == 0 then {dst is unchanged}
8347 else 63 - Clz64(32Uto64(src))
8349 bsf16: if src == 0 then {dst is unchanged}
8350 else Ctz64(32Uto64(16Uto32(src)))
8352 bsr16: if src == 0 then {dst is unchanged}
8353 else 63 - Clz64(32Uto64(16Uto32(src)))
8356 /* The main computation, guarding against zero. */
8361 fwds
? unop(Iop_Ctz64
, mkexpr(src64
))
8364 unop(Iop_Clz64
, mkexpr(src64
))),
8365 /* src == 0 -- leave dst unchanged */
8366 widenUto64( getIRegG( sz
, pfx
, modrm
) )
8371 assign( dst
, unop(Iop_64to16
, mkexpr(dst64
)) );
8374 assign( dst
, unop(Iop_64to32
, mkexpr(dst64
)) );
8376 assign( dst
, mkexpr(dst64
) );
8378 /* dump result back */
8379 putIRegG( sz
, pfx
, modrm
, mkexpr(dst
) );
8385 /* swap rAX with the reg specified by reg and REX.B */
8387 void codegen_xchg_rAX_Reg ( Prefix pfx
, Int sz
, UInt regLo3
)
8389 IRType ty
= szToITy(sz
);
8390 IRTemp t1
= newTemp(ty
);
8391 IRTemp t2
= newTemp(ty
);
8392 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8393 vassert(regLo3
< 8);
8395 assign( t1
, getIReg64(R_RAX
) );
8396 assign( t2
, getIRegRexB(8, pfx
, regLo3
) );
8397 putIReg64( R_RAX
, mkexpr(t2
) );
8398 putIRegRexB(8, pfx
, regLo3
, mkexpr(t1
) );
8399 } else if (sz
== 4) {
8400 assign( t1
, getIReg32(R_RAX
) );
8401 assign( t2
, getIRegRexB(4, pfx
, regLo3
) );
8402 putIReg32( R_RAX
, mkexpr(t2
) );
8403 putIRegRexB(4, pfx
, regLo3
, mkexpr(t1
) );
8405 assign( t1
, getIReg16(R_RAX
) );
8406 assign( t2
, getIRegRexB(2, pfx
, regLo3
) );
8407 putIReg16( R_RAX
, mkexpr(t2
) );
8408 putIRegRexB(2, pfx
, regLo3
, mkexpr(t1
) );
8410 DIP("xchg%c %s, %s\n",
8411 nameISize(sz
), nameIRegRAX(sz
),
8412 nameIRegRexB(sz
,pfx
, regLo3
));
8417 void codegen_SAHF ( void )
8419 /* Set the flags to:
8420 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8421 -- retain the old O flag
8422 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8423 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8425 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8426 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8427 IRTemp oldflags
= newTemp(Ity_I64
);
8428 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
8429 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8430 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8431 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8432 stmt( IRStmt_Put( OFFB_CC_DEP1
,
8434 binop(Iop_And64
, mkexpr(oldflags
), mkU64(AMD64G_CC_MASK_O
)),
8436 binop(Iop_Shr64
, getIReg64(R_RAX
), mkU8(8)),
8444 void codegen_LAHF ( void )
8446 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8447 IRExpr
* rax_with_hole
;
8450 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8451 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8453 IRTemp flags
= newTemp(Ity_I64
);
8454 assign( flags
, mk_amd64g_calculate_rflags_all() );
8457 = binop(Iop_And64
, getIReg64(R_RAX
), mkU64(~0xFF00ULL
));
8459 = binop(Iop_Or64
, binop(Iop_And64
, mkexpr(flags
), mkU64(mask_SZACP
)),
8462 = binop(Iop_Or64
, rax_with_hole
,
8463 binop(Iop_Shl64
, new_byte
, mkU8(8)));
8464 putIReg64(R_RAX
, new_rax
);
8469 ULong
dis_cmpxchg_G_E ( /*OUT*/Bool
* ok
,
8470 const VexAbiInfo
* vbi
,
8478 IRType ty
= szToITy(size
);
8479 IRTemp acc
= newTemp(ty
);
8480 IRTemp src
= newTemp(ty
);
8481 IRTemp dest
= newTemp(ty
);
8482 IRTemp dest2
= newTemp(ty
);
8483 IRTemp acc2
= newTemp(ty
);
8484 IRTemp cond
= newTemp(Ity_I1
);
8485 IRTemp addr
= IRTemp_INVALID
;
8486 UChar rm
= getUChar(delta0
);
8488 /* There are 3 cases to consider:
8490 reg-reg: ignore any lock prefix, generate sequence based
8493 reg-mem, not locked: ignore any lock prefix, generate sequence
8496 reg-mem, locked: use IRCAS
8499 /* Decide whether F2 or F3 are acceptable. Never for register
8500 case, but for the memory case, one or the other is OK provided
8501 LOCK is also present. */
8502 if (epartIsReg(rm
)) {
8503 if (haveF2orF3(pfx
)) {
8508 if (haveF2orF3(pfx
)) {
8509 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
8516 if (epartIsReg(rm
)) {
8518 assign( dest
, getIRegE(size
, pfx
, rm
) );
8520 assign( src
, getIRegG(size
, pfx
, rm
) );
8521 assign( acc
, getIRegRAX(size
) );
8522 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8523 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8524 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8525 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8526 putIRegRAX(size
, mkexpr(acc2
));
8527 putIRegE(size
, pfx
, rm
, mkexpr(dest2
));
8528 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8529 nameIRegG(size
,pfx
,rm
),
8530 nameIRegE(size
,pfx
,rm
) );
8532 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8534 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8535 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
8537 assign( src
, getIRegG(size
, pfx
, rm
) );
8538 assign( acc
, getIRegRAX(size
) );
8539 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8540 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8541 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8542 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8543 putIRegRAX(size
, mkexpr(acc2
));
8544 storeLE( mkexpr(addr
), mkexpr(dest2
) );
8545 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8546 nameIRegG(size
,pfx
,rm
), dis_buf
);
8548 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8550 /* src is new value. acc is expected value. dest is old value.
8551 Compute success from the output of the IRCAS, and steer the
8552 new value for RAX accordingly: in case of success, RAX is
8554 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8556 assign( src
, getIRegG(size
, pfx
, rm
) );
8557 assign( acc
, getIRegRAX(size
) );
8559 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
8560 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
8562 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8563 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8564 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8565 putIRegRAX(size
, mkexpr(acc2
));
8566 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8567 nameIRegG(size
,pfx
,rm
), dis_buf
);
8576 /* Handle conditional move instructions of the form
8577 cmovcc E(reg-or-mem), G(reg)
8579 E(src) is reg-or-mem
8582 If E is reg, --> GET %E, tmps
8587 If E is mem --> (getAddr E) -> tmpa
8594 ULong
dis_cmov_E_G ( const VexAbiInfo
* vbi
,
8600 UChar rm
= getUChar(delta0
);
8604 IRType ty
= szToITy(sz
);
8605 IRTemp tmps
= newTemp(ty
);
8606 IRTemp tmpd
= newTemp(ty
);
8608 if (epartIsReg(rm
)) {
8609 assign( tmps
, getIRegE(sz
, pfx
, rm
) );
8610 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8612 putIRegG( sz
, pfx
, rm
,
8613 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8617 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8618 nameIRegE(sz
,pfx
,rm
),
8619 nameIRegG(sz
,pfx
,rm
));
8623 /* E refers to memory */
8625 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8626 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
8627 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8629 putIRegG( sz
, pfx
, rm
,
8630 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8635 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8637 nameIRegG(sz
,pfx
,rm
));
8644 ULong
dis_xadd_G_E ( /*OUT*/Bool
* decode_ok
,
8645 const VexAbiInfo
* vbi
,
8646 Prefix pfx
, Int sz
, Long delta0
)
8649 UChar rm
= getUChar(delta0
);
8652 IRType ty
= szToITy(sz
);
8653 IRTemp tmpd
= newTemp(ty
);
8654 IRTemp tmpt0
= newTemp(ty
);
8655 IRTemp tmpt1
= newTemp(ty
);
8657 /* There are 3 cases to consider:
8659 reg-reg: ignore any lock prefix,
8660 generate 'naive' (non-atomic) sequence
8662 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8663 (non-atomic) sequence
8665 reg-mem, locked: use IRCAS
8668 if (epartIsReg(rm
)) {
8670 assign( tmpd
, getIRegE(sz
, pfx
, rm
) );
8671 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8672 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8673 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8674 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8675 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8676 putIRegE(sz
, pfx
, rm
, mkexpr(tmpt1
));
8677 DIP("xadd%c %s, %s\n",
8678 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), nameIRegE(sz
,pfx
,rm
));
8682 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8684 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8685 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8686 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8687 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8688 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8689 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8690 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
8691 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8692 DIP("xadd%c %s, %s\n",
8693 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8697 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8699 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8700 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8701 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8702 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8703 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8704 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
8705 mkexpr(tmpt1
)/*newVal*/, guest_RIP_curr_instr
);
8706 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8707 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8708 DIP("xadd%c %s, %s\n",
8709 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8717 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8720 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8724 //.. UChar rm = getUChar(delta0);
8725 //.. HChar dis_buf[50];
8727 //.. if (epartIsReg(rm)) {
8728 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8729 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8730 //.. return 1+delta0;
8732 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8733 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8734 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8735 //.. return len+delta0;
8739 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8740 //.. dst is ireg and sz==4, zero out top half of it. */
8743 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8749 //.. UChar rm = getUChar(delta0);
8750 //.. HChar dis_buf[50];
8752 //.. vassert(sz == 2 || sz == 4);
8754 //.. if (epartIsReg(rm)) {
8756 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8758 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8760 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8761 //.. return 1+delta0;
8763 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8764 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8765 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8766 //.. return len+delta0;
8770 /* Handle move instructions of the form
8772 mov sreg, reg-or-mem
8773 Is passed the a ptr to the modRM byte, and the data size. Returns
8774 the address advanced completely over this instruction.
8776 VEX does not currently simulate segment registers on AMD64 which means that
8777 instead of moving a value of a segment register, zero is moved to the
8778 destination. The zero value represents a null (unused) selector. This is
8779 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8780 provide a sufficient simulation for currently seen programs that use this
8781 instruction. If some program actually decides to use the obtained segment
8782 selector for something meaningful then the zero value should be a clear
8783 indicator that there is some problem.
8786 E(dst) is reg-or-mem
8788 If E is reg, --> PUT $0, %E
8790 If E is mem, --> (getAddr E) -> tmpa
8794 ULong
dis_mov_S_E ( const VexAbiInfo
* vbi
,
8800 UChar rm
= getUChar(delta0
);
8803 if (epartIsReg(rm
)) {
8804 putIRegE(size
, pfx
, rm
, mkU(szToITy(size
), 0));
8805 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8806 nameIRegE(size
, pfx
, rm
));
8810 /* E refers to memory */
8812 IRTemp addr
= disAMode(&len
, vbi
, pfx
, delta0
, dis_buf
, 0);
8813 storeLE(mkexpr(addr
), mkU16(0));
8814 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8821 //.. void dis_push_segreg ( UInt sreg, Int sz )
8823 //.. IRTemp t1 = newTemp(Ity_I16);
8824 //.. IRTemp ta = newTemp(Ity_I32);
8825 //.. vassert(sz == 2 || sz == 4);
8827 //.. assign( t1, getSReg(sreg) );
8828 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8829 //.. putIReg(4, R_ESP, mkexpr(ta));
8830 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8832 //.. DIP("pushw %s\n", nameSReg(sreg));
8836 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8838 //.. IRTemp t1 = newTemp(Ity_I16);
8839 //.. IRTemp ta = newTemp(Ity_I32);
8840 //.. vassert(sz == 2 || sz == 4);
8842 //.. assign( ta, getIReg(4, R_ESP) );
8843 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8845 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8846 //.. putSReg( sreg, mkexpr(t1) );
8847 //.. DIP("pop %s\n", nameSReg(sreg));
8851 void dis_ret ( /*MOD*/DisResult
* dres
, const VexAbiInfo
* vbi
, ULong d64
)
8853 IRTemp t1
= newTemp(Ity_I64
);
8854 IRTemp t2
= newTemp(Ity_I64
);
8855 IRTemp t3
= newTemp(Ity_I64
);
8856 assign(t1
, getIReg64(R_RSP
));
8857 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
8858 assign(t3
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8+d64
)));
8859 putIReg64(R_RSP
, mkexpr(t3
));
8860 make_redzone_AbiHint(vbi
, t3
, t2
/*nia*/, "ret");
8861 jmp_treg(dres
, Ijk_Ret
, t2
);
8862 vassert(dres
->whatNext
== Dis_StopHere
);
8866 /*------------------------------------------------------------*/
8867 /*--- SSE/SSE2/SSE3 helpers ---*/
8868 /*------------------------------------------------------------*/
8870 /* Indicates whether the op requires a rounding-mode argument. Note
8871 that this covers only vector floating point arithmetic ops, and
8872 omits the scalar ones that need rounding modes. Note also that
8873 inconsistencies here will get picked up later by the IR sanity
8874 checker, so this isn't correctness-critical. */
8875 static Bool
requiresRMode ( IROp op
)
8879 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
8880 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
8881 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
8882 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
8884 case Iop_Add32Fx8
: case Iop_Sub32Fx8
:
8885 case Iop_Mul32Fx8
: case Iop_Div32Fx8
:
8886 case Iop_Add64Fx4
: case Iop_Sub64Fx4
:
8887 case Iop_Mul64Fx4
: case Iop_Div64Fx4
:
8896 /* Worker function; do not call directly.
8897 Handles full width G = G `op` E and G = (not G) `op` E.
8900 static ULong
dis_SSE_E_to_G_all_wrk (
8901 const VexAbiInfo
* vbi
,
8902 Prefix pfx
, Long delta
,
8903 const HChar
* opname
, IROp op
,
8910 UChar rm
= getUChar(delta
);
8911 Bool needsRMode
= requiresRMode(op
);
8913 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRexRM(pfx
,rm
)))
8914 : getXMMReg(gregOfRexRM(pfx
,rm
));
8915 if (epartIsReg(rm
)) {
8917 gregOfRexRM(pfx
,rm
),
8919 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8921 getXMMReg(eregOfRexRM(pfx
,rm
)))
8923 getXMMReg(eregOfRexRM(pfx
,rm
)))
8925 DIP("%s %s,%s\n", opname
,
8926 nameXMMReg(eregOfRexRM(pfx
,rm
)),
8927 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
8930 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
8932 gregOfRexRM(pfx
,rm
),
8934 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8936 loadLE(Ity_V128
, mkexpr(addr
)))
8938 loadLE(Ity_V128
, mkexpr(addr
)))
8940 DIP("%s %s,%s\n", opname
,
8942 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
8948 /* All lanes SSE binary operation, G = G `op` E. */
8951 ULong
dis_SSE_E_to_G_all ( const VexAbiInfo
* vbi
,
8952 Prefix pfx
, Long delta
,
8953 const HChar
* opname
, IROp op
)
8955 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, False
);
8958 /* All lanes SSE binary operation, G = (not G) `op` E. */
8961 ULong
dis_SSE_E_to_G_all_invG ( const VexAbiInfo
* vbi
,
8962 Prefix pfx
, Long delta
,
8963 const HChar
* opname
, IROp op
)
8965 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, True
);
8969 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8971 static ULong
dis_SSE_E_to_G_lo32 ( const VexAbiInfo
* vbi
,
8972 Prefix pfx
, Long delta
,
8973 const HChar
* opname
, IROp op
)
8978 UChar rm
= getUChar(delta
);
8979 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
8980 if (epartIsReg(rm
)) {
8981 putXMMReg( gregOfRexRM(pfx
,rm
),
8983 getXMMReg(eregOfRexRM(pfx
,rm
))) );
8984 DIP("%s %s,%s\n", opname
,
8985 nameXMMReg(eregOfRexRM(pfx
,rm
)),
8986 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
8989 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8990 E operand needs to be made simply of zeroes. */
8991 IRTemp epart
= newTemp(Ity_V128
);
8992 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
8993 assign( epart
, unop( Iop_32UtoV128
,
8994 loadLE(Ity_I32
, mkexpr(addr
))) );
8995 putXMMReg( gregOfRexRM(pfx
,rm
),
8996 binop(op
, gpart
, mkexpr(epart
)) );
8997 DIP("%s %s,%s\n", opname
,
8999 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9005 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9007 static ULong
dis_SSE_E_to_G_lo64 ( const VexAbiInfo
* vbi
,
9008 Prefix pfx
, Long delta
,
9009 const HChar
* opname
, IROp op
)
9014 UChar rm
= getUChar(delta
);
9015 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9016 if (epartIsReg(rm
)) {
9017 putXMMReg( gregOfRexRM(pfx
,rm
),
9019 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9020 DIP("%s %s,%s\n", opname
,
9021 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9022 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9025 /* We can only do a 64-bit memory read, so the upper half of the
9026 E operand needs to be made simply of zeroes. */
9027 IRTemp epart
= newTemp(Ity_V128
);
9028 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9029 assign( epart
, unop( Iop_64UtoV128
,
9030 loadLE(Ity_I64
, mkexpr(addr
))) );
9031 putXMMReg( gregOfRexRM(pfx
,rm
),
9032 binop(op
, gpart
, mkexpr(epart
)) );
9033 DIP("%s %s,%s\n", opname
,
9035 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9041 /* All lanes unary SSE operation, G = op(E). */
9043 static ULong
dis_SSE_E_to_G_unary_all (
9044 const VexAbiInfo
* vbi
,
9045 Prefix pfx
, Long delta
,
9046 const HChar
* opname
, IROp op
9052 UChar rm
= getUChar(delta
);
9053 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9054 // up in the usual way.
9055 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
9056 if (epartIsReg(rm
)) {
9057 IRExpr
* src
= getXMMReg(eregOfRexRM(pfx
,rm
));
9058 /* XXXROUNDINGFIXME */
9059 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9061 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9062 DIP("%s %s,%s\n", opname
,
9063 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9064 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9067 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9068 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
9069 /* XXXROUNDINGFIXME */
9070 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9072 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9073 DIP("%s %s,%s\n", opname
,
9075 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9081 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9083 static ULong
dis_SSE_E_to_G_unary_lo32 (
9084 const VexAbiInfo
* vbi
,
9085 Prefix pfx
, Long delta
,
9086 const HChar
* opname
, IROp op
9089 /* First we need to get the old G value and patch the low 32 bits
9090 of the E operand into it. Then apply op and write back to G. */
9094 UChar rm
= getUChar(delta
);
9095 IRTemp oldG0
= newTemp(Ity_V128
);
9096 IRTemp oldG1
= newTemp(Ity_V128
);
9098 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9100 if (epartIsReg(rm
)) {
9102 binop( Iop_SetV128lo32
,
9104 getXMMRegLane32(eregOfRexRM(pfx
,rm
), 0)) );
9105 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9106 DIP("%s %s,%s\n", opname
,
9107 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9108 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9111 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9113 binop( Iop_SetV128lo32
,
9115 loadLE(Ity_I32
, mkexpr(addr
)) ));
9116 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9117 DIP("%s %s,%s\n", opname
,
9119 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9125 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9127 static ULong
dis_SSE_E_to_G_unary_lo64 (
9128 const VexAbiInfo
* vbi
,
9129 Prefix pfx
, Long delta
,
9130 const HChar
* opname
, IROp op
9133 /* First we need to get the old G value and patch the low 64 bits
9134 of the E operand into it. Then apply op and write back to G. */
9138 UChar rm
= getUChar(delta
);
9139 IRTemp oldG0
= newTemp(Ity_V128
);
9140 IRTemp oldG1
= newTemp(Ity_V128
);
9142 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9144 if (epartIsReg(rm
)) {
9146 binop( Iop_SetV128lo64
,
9148 getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0)) );
9149 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9150 DIP("%s %s,%s\n", opname
,
9151 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9152 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9155 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9157 binop( Iop_SetV128lo64
,
9159 loadLE(Ity_I64
, mkexpr(addr
)) ));
9160 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9161 DIP("%s %s,%s\n", opname
,
9163 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9169 /* SSE integer binary operation:
9170 G = G `op` E (eLeft == False)
9171 G = E `op` G (eLeft == True)
9173 static ULong
dis_SSEint_E_to_G(
9174 const VexAbiInfo
* vbi
,
9175 Prefix pfx
, Long delta
,
9176 const HChar
* opname
, IROp op
,
9183 UChar rm
= getUChar(delta
);
9184 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9185 IRExpr
* epart
= NULL
;
9186 if (epartIsReg(rm
)) {
9187 epart
= getXMMReg(eregOfRexRM(pfx
,rm
));
9188 DIP("%s %s,%s\n", opname
,
9189 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9190 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9193 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9194 epart
= loadLE(Ity_V128
, mkexpr(addr
));
9195 DIP("%s %s,%s\n", opname
,
9197 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9200 putXMMReg( gregOfRexRM(pfx
,rm
),
9201 eLeft
? binop(op
, epart
, gpart
)
9202 : binop(op
, gpart
, epart
) );
9207 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9208 This is all a bit of a kludge in that it ignores the subtleties of
9209 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9211 static Bool
findSSECmpOp ( /*OUT*/Bool
* preSwapP
,
9213 /*OUT*/Bool
* postNotP
,
9214 UInt imm8
, Bool all_lanes
, Int sz
)
9216 if (imm8
>= 32) return False
;
9218 /* First, compute a (preSwap, op, postNot) triple from
9219 the supplied imm8. */
9221 IROp op
= Iop_INVALID
;
9224 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; }
9225 // If you add a case here, add a corresponding test for both VCMPSD_128
9226 // and VCMPSS_128 in avx-1.c.
9227 // Cases 0xA and above are
9228 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9230 // "O" = ordered, "U" = unordered
9231 // "Q" = non-signalling (quiet), "S" = signalling
9235 // | cmp op invert after?
9238 case 0x0: XXX(False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OQ
9239 case 0x8: XXX(False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_UQ
9240 case 0x10: XXX(False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OS
9241 case 0x18: XXX(False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_US
9243 case 0x1: XXX(False
, Iop_CmpLT32Fx4
, False
); break; // LT_OS
9244 case 0x11: XXX(False
, Iop_CmpLT32Fx4
, False
); break; // LT_OQ
9246 case 0x2: XXX(False
, Iop_CmpLE32Fx4
, False
); break; // LE_OS
9247 case 0x12: XXX(False
, Iop_CmpLE32Fx4
, False
); break; // LE_OQ
9249 case 0x3: XXX(False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_Q
9250 case 0x13: XXX(False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_S
9252 // 0xC: this isn't really right because it returns all-1s when
9253 // either operand is a NaN, and it should return all-0s.
9254 case 0x4: XXX(False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_UQ
9255 case 0xC: XXX(False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OQ
9256 case 0x14: XXX(False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_US
9257 case 0x1C: XXX(False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OS
9259 case 0x5: XXX(False
, Iop_CmpLT32Fx4
, True
); break; // NLT_US
9260 case 0x15: XXX(False
, Iop_CmpLT32Fx4
, True
); break; // NLT_UQ
9262 case 0x6: XXX(False
, Iop_CmpLE32Fx4
, True
); break; // NLE_US
9263 case 0x16: XXX(False
, Iop_CmpLE32Fx4
, True
); break; // NLE_UQ
9265 case 0x7: XXX(False
, Iop_CmpUN32Fx4
, True
); break; // ORD_Q
9266 case 0x17: XXX(False
, Iop_CmpUN32Fx4
, True
); break; // ORD_S
9268 case 0x9: XXX(True
, Iop_CmpLE32Fx4
, True
); break; // NGE_US
9269 case 0x19: XXX(True
, Iop_CmpLE32Fx4
, True
); break; // NGE_UQ
9271 case 0xA: XXX(True
, Iop_CmpLT32Fx4
, True
); break; // NGT_US
9272 case 0x1A: XXX(True
, Iop_CmpLT32Fx4
, True
); break; // NGT_UQ
9274 case 0xD: XXX(True
, Iop_CmpLE32Fx4
, False
); break; // GE_OS
9275 case 0x1D: XXX(True
, Iop_CmpLE32Fx4
, False
); break; // GE_OQ
9277 case 0xE: XXX(True
, Iop_CmpLT32Fx4
, False
); break; // GT_OS
9278 case 0x1E: XXX(True
, Iop_CmpLT32Fx4
, False
); break; // GT_OQ
9284 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9285 avx-1.c if new cases turn up. */
9289 if (op
== Iop_INVALID
) return False
;
9291 /* Now convert the op into one with the same arithmetic but that is
9292 correct for the width and laneage requirements. */
9294 /**/ if (sz
== 4 && all_lanes
) {
9296 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32Fx4
; break;
9297 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32Fx4
; break;
9298 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32Fx4
; break;
9299 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32Fx4
; break;
9300 default: vassert(0);
9303 else if (sz
== 4 && !all_lanes
) {
9305 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32F0x4
; break;
9306 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32F0x4
; break;
9307 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32F0x4
; break;
9308 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32F0x4
; break;
9309 default: vassert(0);
9312 else if (sz
== 8 && all_lanes
) {
9314 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64Fx2
; break;
9315 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64Fx2
; break;
9316 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64Fx2
; break;
9317 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64Fx2
; break;
9318 default: vassert(0);
9321 else if (sz
== 8 && !all_lanes
) {
9323 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64F0x2
; break;
9324 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64F0x2
; break;
9325 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64F0x2
; break;
9326 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64F0x2
; break;
9327 default: vassert(0);
9331 vpanic("findSSECmpOp(amd64,guest)");
9334 *preSwapP
= pre
; *opP
= op
; *postNotP
= not;
9339 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9340 returns the original delta to indicate failure. */
9342 static Long
dis_SSE_cmp_E_to_G ( const VexAbiInfo
* vbi
,
9343 Prefix pfx
, Long delta
,
9344 const HChar
* opname
, Bool all_lanes
, Int sz
)
9346 Long delta0
= delta
;
9351 Bool preSwap
= False
;
9352 IROp op
= Iop_INVALID
;
9353 Bool postNot
= False
;
9354 IRTemp plain
= newTemp(Ity_V128
);
9355 UChar rm
= getUChar(delta
);
9357 vassert(sz
== 4 || sz
== 8);
9358 if (epartIsReg(rm
)) {
9359 imm8
= getUChar(delta
+1);
9360 if (imm8
>= 8) return delta0
; /* FAIL */
9361 Bool ok
= findSSECmpOp(&preSwap
, &op
, &postNot
, imm8
, all_lanes
, sz
);
9362 if (!ok
) return delta0
; /* FAIL */
9363 vassert(!preSwap
); /* never needed for imm8 < 8 */
9364 assign( plain
, binop(op
, getXMMReg(gregOfRexRM(pfx
,rm
)),
9365 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9367 DIP("%s $%u,%s,%s\n", opname
,
9369 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9370 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9372 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
9373 imm8
= getUChar(delta
+alen
);
9374 if (imm8
>= 8) return delta0
; /* FAIL */
9375 Bool ok
= findSSECmpOp(&preSwap
, &op
, &postNot
, imm8
, all_lanes
, sz
);
9376 if (!ok
) return delta0
; /* FAIL */
9377 vassert(!preSwap
); /* never needed for imm8 < 8 */
9381 getXMMReg(gregOfRexRM(pfx
,rm
)),
9383 ? loadLE(Ity_V128
, mkexpr(addr
))
9385 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
9387 unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
9391 DIP("%s $%u,%s,%s\n", opname
,
9394 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9397 if (postNot
&& all_lanes
) {
9398 putXMMReg( gregOfRexRM(pfx
,rm
),
9399 unop(Iop_NotV128
, mkexpr(plain
)) );
9402 if (postNot
&& !all_lanes
) {
9403 mask
= toUShort(sz
==4 ? 0x000F : 0x00FF);
9404 putXMMReg( gregOfRexRM(pfx
,rm
),
9405 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
9408 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(plain
) );
9415 /* Vector by scalar shift of G by the amount specified at the bottom
9418 static ULong
dis_SSE_shiftG_byE ( const VexAbiInfo
* vbi
,
9419 Prefix pfx
, Long delta
,
9420 const HChar
* opname
, IROp op
)
9426 UChar rm
= getUChar(delta
);
9427 IRTemp g0
= newTemp(Ity_V128
);
9428 IRTemp g1
= newTemp(Ity_V128
);
9429 IRTemp amt
= newTemp(Ity_I64
);
9430 IRTemp amt8
= newTemp(Ity_I8
);
9431 if (epartIsReg(rm
)) {
9432 assign( amt
, getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0) );
9433 DIP("%s %s,%s\n", opname
,
9434 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9435 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9438 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9439 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
9440 DIP("%s %s,%s\n", opname
,
9442 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9445 assign( g0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9446 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
9448 shl
= shr
= sar
= False
;
9451 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
9452 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9453 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9454 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9455 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9456 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9457 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9458 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9459 default: vassert(0);
9466 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9467 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9476 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9477 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9478 binop(op
, mkexpr(g0
), mkU8(size
-1))
9485 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(g1
) );
9490 /* Vector by scalar shift of E by an immediate byte. */
9493 ULong
dis_SSE_shiftE_imm ( Prefix pfx
,
9494 Long delta
, const HChar
* opname
, IROp op
)
9497 UChar rm
= getUChar(delta
);
9498 IRTemp e0
= newTemp(Ity_V128
);
9499 IRTemp e1
= newTemp(Ity_V128
);
9501 vassert(epartIsReg(rm
));
9502 vassert(gregLO3ofRM(rm
) == 2
9503 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
9504 amt
= getUChar(delta
+1);
9506 DIP("%s $%d,%s\n", opname
,
9508 nameXMMReg(eregOfRexRM(pfx
,rm
)) );
9509 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
9511 shl
= shr
= sar
= False
;
9514 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
9515 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9516 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9517 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9518 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9519 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9520 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9521 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9522 default: vassert(0);
9526 assign( e1
, amt
>= size
9528 : binop(op
, mkexpr(e0
), mkU8(amt
))
9532 assign( e1
, amt
>= size
9533 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
9534 : binop(op
, mkexpr(e0
), mkU8(amt
))
9540 putXMMReg( eregOfRexRM(pfx
,rm
), mkexpr(e1
) );
9545 /* Get the current SSE rounding mode. */
9547 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9552 IRExpr_Get( OFFB_SSEROUND
, Ity_I64
),
9556 static void put_sse_roundingmode ( IRExpr
* sseround
)
9558 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
9559 stmt( IRStmt_Put( OFFB_SSEROUND
,
9560 unop(Iop_32Uto64
,sseround
) ) );
9563 /* Break a V128-bit value up into four 32-bit ints. */
9565 static void breakupV128to32s ( IRTemp t128
,
9567 IRTemp
* t3
, IRTemp
* t2
,
9568 IRTemp
* t1
, IRTemp
* t0
)
9570 IRTemp hi64
= newTemp(Ity_I64
);
9571 IRTemp lo64
= newTemp(Ity_I64
);
9572 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9573 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
9575 vassert(t0
&& *t0
== IRTemp_INVALID
);
9576 vassert(t1
&& *t1
== IRTemp_INVALID
);
9577 vassert(t2
&& *t2
== IRTemp_INVALID
);
9578 vassert(t3
&& *t3
== IRTemp_INVALID
);
9580 *t0
= newTemp(Ity_I32
);
9581 *t1
= newTemp(Ity_I32
);
9582 *t2
= newTemp(Ity_I32
);
9583 *t3
= newTemp(Ity_I32
);
9584 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
9585 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
9586 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
9587 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
9590 /* Construct a V128-bit value from four 32-bit ints. */
9592 static IRExpr
* mkV128from32s ( IRTemp t3
, IRTemp t2
,
9593 IRTemp t1
, IRTemp t0
)
9596 binop( Iop_64HLtoV128
,
9597 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9598 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
9602 /* Break a 64-bit value up into four 16-bit ints. */
9604 static void breakup64to16s ( IRTemp t64
,
9606 IRTemp
* t3
, IRTemp
* t2
,
9607 IRTemp
* t1
, IRTemp
* t0
)
9609 IRTemp hi32
= newTemp(Ity_I32
);
9610 IRTemp lo32
= newTemp(Ity_I32
);
9611 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
9612 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
9614 vassert(t0
&& *t0
== IRTemp_INVALID
);
9615 vassert(t1
&& *t1
== IRTemp_INVALID
);
9616 vassert(t2
&& *t2
== IRTemp_INVALID
);
9617 vassert(t3
&& *t3
== IRTemp_INVALID
);
9619 *t0
= newTemp(Ity_I16
);
9620 *t1
= newTemp(Ity_I16
);
9621 *t2
= newTemp(Ity_I16
);
9622 *t3
= newTemp(Ity_I16
);
9623 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
9624 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
9625 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
9626 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
9629 /* Construct a 64-bit value from four 16-bit ints. */
9631 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
9632 IRTemp t1
, IRTemp t0
)
9635 binop( Iop_32HLto64
,
9636 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
9637 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
9641 /* Break a V256-bit value up into four 64-bit ints. */
9643 static void breakupV256to64s ( IRTemp t256
,
9645 IRTemp
* t3
, IRTemp
* t2
,
9646 IRTemp
* t1
, IRTemp
* t0
)
9648 vassert(t0
&& *t0
== IRTemp_INVALID
);
9649 vassert(t1
&& *t1
== IRTemp_INVALID
);
9650 vassert(t2
&& *t2
== IRTemp_INVALID
);
9651 vassert(t3
&& *t3
== IRTemp_INVALID
);
9652 *t0
= newTemp(Ity_I64
);
9653 *t1
= newTemp(Ity_I64
);
9654 *t2
= newTemp(Ity_I64
);
9655 *t3
= newTemp(Ity_I64
);
9656 assign( *t0
, unop(Iop_V256to64_0
, mkexpr(t256
)) );
9657 assign( *t1
, unop(Iop_V256to64_1
, mkexpr(t256
)) );
9658 assign( *t2
, unop(Iop_V256to64_2
, mkexpr(t256
)) );
9659 assign( *t3
, unop(Iop_V256to64_3
, mkexpr(t256
)) );
9662 /* Break a V256-bit value up into two V128s. */
9664 static void breakupV256toV128s ( IRTemp t256
,
9666 IRTemp
* t1
, IRTemp
* t0
)
9668 vassert(t0
&& *t0
== IRTemp_INVALID
);
9669 vassert(t1
&& *t1
== IRTemp_INVALID
);
9670 *t0
= newTemp(Ity_V128
);
9671 *t1
= newTemp(Ity_V128
);
9672 assign(*t1
, unop(Iop_V256toV128_1
, mkexpr(t256
)));
9673 assign(*t0
, unop(Iop_V256toV128_0
, mkexpr(t256
)));
9676 /* Break a V256-bit value up into eight 32-bit ints. */
9678 static void breakupV256to32s ( IRTemp t256
,
9680 IRTemp
* t7
, IRTemp
* t6
,
9681 IRTemp
* t5
, IRTemp
* t4
,
9682 IRTemp
* t3
, IRTemp
* t2
,
9683 IRTemp
* t1
, IRTemp
* t0
)
9685 IRTemp t128_1
= IRTemp_INVALID
;
9686 IRTemp t128_0
= IRTemp_INVALID
;
9687 breakupV256toV128s( t256
, &t128_1
, &t128_0
);
9688 breakupV128to32s( t128_1
, t7
, t6
, t5
, t4
);
9689 breakupV128to32s( t128_0
, t3
, t2
, t1
, t0
);
9692 /* Break a V128-bit value up into two 64-bit ints. */
9694 static void breakupV128to64s ( IRTemp t128
,
9696 IRTemp
* t1
, IRTemp
* t0
)
9698 vassert(t0
&& *t0
== IRTemp_INVALID
);
9699 vassert(t1
&& *t1
== IRTemp_INVALID
);
9700 *t0
= newTemp(Ity_I64
);
9701 *t1
= newTemp(Ity_I64
);
9702 assign( *t0
, unop(Iop_V128to64
, mkexpr(t128
)) );
9703 assign( *t1
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9706 /* Construct a V256-bit value from eight 32-bit ints. */
9708 static IRExpr
* mkV256from32s ( IRTemp t7
, IRTemp t6
,
9709 IRTemp t5
, IRTemp t4
,
9710 IRTemp t3
, IRTemp t2
,
9711 IRTemp t1
, IRTemp t0
)
9714 binop( Iop_V128HLtoV256
,
9715 binop( Iop_64HLtoV128
,
9716 binop(Iop_32HLto64
, mkexpr(t7
), mkexpr(t6
)),
9717 binop(Iop_32HLto64
, mkexpr(t5
), mkexpr(t4
)) ),
9718 binop( Iop_64HLtoV128
,
9719 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9720 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
)) )
9724 /* Construct a V256-bit value from four 64-bit ints. */
9726 static IRExpr
* mkV256from64s ( IRTemp t3
, IRTemp t2
,
9727 IRTemp t1
, IRTemp t0
)
9730 binop( Iop_V128HLtoV256
,
9731 binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)),
9732 binop(Iop_64HLtoV128
, mkexpr(t1
), mkexpr(t0
))
9736 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9737 values (aa,bb), computes, for each of the 4 16-bit lanes:
9739 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9741 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
9743 IRTemp aa
= newTemp(Ity_I64
);
9744 IRTemp bb
= newTemp(Ity_I64
);
9745 IRTemp aahi32s
= newTemp(Ity_I64
);
9746 IRTemp aalo32s
= newTemp(Ity_I64
);
9747 IRTemp bbhi32s
= newTemp(Ity_I64
);
9748 IRTemp bblo32s
= newTemp(Ity_I64
);
9749 IRTemp rHi
= newTemp(Ity_I64
);
9750 IRTemp rLo
= newTemp(Ity_I64
);
9751 IRTemp one32x2
= newTemp(Ity_I64
);
9756 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
9760 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
9764 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
9768 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
9770 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
9779 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
9795 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
9804 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
9807 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9808 values (aa,bb), computes, for each lane:
9810 if aa_lane < 0 then - bb_lane
9811 else if aa_lane > 0 then bb_lane
9814 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
9816 IRTemp aa
= newTemp(Ity_I64
);
9817 IRTemp bb
= newTemp(Ity_I64
);
9818 IRTemp zero
= newTemp(Ity_I64
);
9819 IRTemp bbNeg
= newTemp(Ity_I64
);
9820 IRTemp negMask
= newTemp(Ity_I64
);
9821 IRTemp posMask
= newTemp(Ity_I64
);
9822 IROp opSub
= Iop_INVALID
;
9823 IROp opCmpGTS
= Iop_INVALID
;
9826 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
9827 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
9828 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
9829 default: vassert(0);
9834 assign( zero
, mkU64(0) );
9835 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
9836 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
9837 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
9841 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
9842 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
9847 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9848 value aa, computes, for each lane
9850 if aa < 0 then -aa else aa
9852 Note that the result is interpreted as unsigned, so that the
9853 absolute value of the most negative signed input can be
9856 static IRTemp
math_PABS_MMX ( IRTemp aa
, Int laneszB
)
9858 IRTemp res
= newTemp(Ity_I64
);
9859 IRTemp zero
= newTemp(Ity_I64
);
9860 IRTemp aaNeg
= newTemp(Ity_I64
);
9861 IRTemp negMask
= newTemp(Ity_I64
);
9862 IRTemp posMask
= newTemp(Ity_I64
);
9863 IROp opSub
= Iop_INVALID
;
9864 IROp opSarN
= Iop_INVALID
;
9867 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
9868 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
9869 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
9870 default: vassert(0);
9873 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
9874 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
9875 assign( zero
, mkU64(0) );
9876 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
9879 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
9880 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) ));
9884 /* XMM version of math_PABS_MMX. */
9885 static IRTemp
math_PABS_XMM ( IRTemp aa
, Int laneszB
)
9887 IRTemp res
= newTemp(Ity_V128
);
9888 IRTemp aaHi
= newTemp(Ity_I64
);
9889 IRTemp aaLo
= newTemp(Ity_I64
);
9890 assign(aaHi
, unop(Iop_V128HIto64
, mkexpr(aa
)));
9891 assign(aaLo
, unop(Iop_V128to64
, mkexpr(aa
)));
9892 assign(res
, binop(Iop_64HLtoV128
,
9893 mkexpr(math_PABS_MMX(aaHi
, laneszB
)),
9894 mkexpr(math_PABS_MMX(aaLo
, laneszB
))));
9898 /* Specialisations of math_PABS_XMM, since there's no easy way to do
9899 partial applications in C :-( */
9900 static IRTemp
math_PABS_XMM_pap4 ( IRTemp aa
) {
9901 return math_PABS_XMM(aa
, 4);
9904 static IRTemp
math_PABS_XMM_pap2 ( IRTemp aa
) {
9905 return math_PABS_XMM(aa
, 2);
9908 static IRTemp
math_PABS_XMM_pap1 ( IRTemp aa
) {
9909 return math_PABS_XMM(aa
, 1);
9912 /* YMM version of math_PABS_XMM. */
9913 static IRTemp
math_PABS_YMM ( IRTemp aa
, Int laneszB
)
9915 IRTemp res
= newTemp(Ity_V256
);
9916 IRTemp aaHi
= IRTemp_INVALID
;
9917 IRTemp aaLo
= IRTemp_INVALID
;
9918 breakupV256toV128s(aa
, &aaHi
, &aaLo
);
9919 assign(res
, binop(Iop_V128HLtoV256
,
9920 mkexpr(math_PABS_XMM(aaHi
, laneszB
)),
9921 mkexpr(math_PABS_XMM(aaLo
, laneszB
))));
9925 static IRTemp
math_PABS_YMM_pap4 ( IRTemp aa
) {
9926 return math_PABS_YMM(aa
, 4);
9929 static IRTemp
math_PABS_YMM_pap2 ( IRTemp aa
) {
9930 return math_PABS_YMM(aa
, 2);
9933 static IRTemp
math_PABS_YMM_pap1 ( IRTemp aa
) {
9934 return math_PABS_YMM(aa
, 1);
9937 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
9938 IRTemp lo64
, Long byteShift
)
9940 vassert(byteShift
>= 1 && byteShift
<= 7);
9943 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
9944 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
9948 static IRTemp
math_PALIGNR_XMM ( IRTemp sV
, IRTemp dV
, UInt imm8
)
9950 IRTemp res
= newTemp(Ity_V128
);
9951 IRTemp sHi
= newTemp(Ity_I64
);
9952 IRTemp sLo
= newTemp(Ity_I64
);
9953 IRTemp dHi
= newTemp(Ity_I64
);
9954 IRTemp dLo
= newTemp(Ity_I64
);
9955 IRTemp rHi
= newTemp(Ity_I64
);
9956 IRTemp rLo
= newTemp(Ity_I64
);
9958 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
9959 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
9960 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
9961 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
9964 assign( rHi
, mkexpr(sHi
) );
9965 assign( rLo
, mkexpr(sLo
) );
9967 else if (imm8
>= 1 && imm8
<= 7) {
9968 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
) );
9969 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, imm8
) );
9971 else if (imm8
== 8) {
9972 assign( rHi
, mkexpr(dLo
) );
9973 assign( rLo
, mkexpr(sHi
) );
9975 else if (imm8
>= 9 && imm8
<= 15) {
9976 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-8) );
9977 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
-8) );
9979 else if (imm8
== 16) {
9980 assign( rHi
, mkexpr(dHi
) );
9981 assign( rLo
, mkexpr(dLo
) );
9983 else if (imm8
>= 17 && imm8
<= 23) {
9984 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-16))) );
9985 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-16) );
9987 else if (imm8
== 24) {
9988 assign( rHi
, mkU64(0) );
9989 assign( rLo
, mkexpr(dHi
) );
9991 else if (imm8
>= 25 && imm8
<= 31) {
9992 assign( rHi
, mkU64(0) );
9993 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-24))) );
9995 else if (imm8
>= 32 && imm8
<= 255) {
9996 assign( rHi
, mkU64(0) );
9997 assign( rLo
, mkU64(0) );
10002 assign( res
, binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
)));
10007 /* Generate a SIGSEGV followed by a restart of the current instruction
10008 if effective_addr is not 16-aligned. This is required behaviour
10009 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10010 This assumes that guest_RIP_curr_instr is set correctly! */
10012 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr
, ULong mask
)
10017 binop(Iop_And64
,mkexpr(effective_addr
),mkU64(mask
)),
10020 IRConst_U64(guest_RIP_curr_instr
),
10026 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr
) {
10027 gen_SEGV_if_not_XX_aligned(effective_addr
, 16-1);
10030 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr
) {
10031 gen_SEGV_if_not_XX_aligned(effective_addr
, 32-1);
10034 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr
) {
10035 gen_SEGV_if_not_XX_aligned(effective_addr
, 64-1);
10038 /* Helper for deciding whether a given insn (starting at the opcode
10039 byte) may validly be used with a LOCK prefix. The following insns
10040 may be used with LOCK when their destination operand is in memory.
10041 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10043 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10044 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10045 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10046 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10047 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10048 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10049 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10059 BTC 0F BB, 0F BA /7
10060 BTR 0F B3, 0F BA /6
10061 BTS 0F AB, 0F BA /5
10063 CMPXCHG 0F B0, 0F B1
10068 ------------------------------
10070 80 /0 = addb $imm8, rm8
10071 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10072 82 /0 = addb $imm8, rm8
10073 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10076 01 = addl r32, rm32 and addw r16, rm16
10078 Same for ADD OR ADC SBB AND SUB XOR
10081 FF /1 = dec rm32 and dec rm16
10084 FF /0 = inc rm32 and inc rm16
10087 F7 /3 = neg rm32 and neg rm16
10090 F7 /2 = not rm32 and not rm16
10092 0F BB = btcw r16, rm16 and btcl r32, rm32
10093 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10097 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
10100 case 0x00: case 0x01: case 0x08: case 0x09:
10101 case 0x10: case 0x11: case 0x18: case 0x19:
10102 case 0x20: case 0x21: case 0x28: case 0x29:
10103 case 0x30: case 0x31:
10104 if (!epartIsReg(opc
[1]))
10108 case 0x80: case 0x81: case 0x82: case 0x83:
10109 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 6
10110 && !epartIsReg(opc
[1]))
10114 case 0xFE: case 0xFF:
10115 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 1
10116 && !epartIsReg(opc
[1]))
10120 case 0xF6: case 0xF7:
10121 if (gregLO3ofRM(opc
[1]) >= 2 && gregLO3ofRM(opc
[1]) <= 3
10122 && !epartIsReg(opc
[1]))
10126 case 0x86: case 0x87:
10127 if (!epartIsReg(opc
[1]))
10133 case 0xBB: case 0xB3: case 0xAB:
10134 if (!epartIsReg(opc
[2]))
10138 if (gregLO3ofRM(opc
[2]) >= 5 && gregLO3ofRM(opc
[2]) <= 7
10139 && !epartIsReg(opc
[2]))
10142 case 0xB0: case 0xB1:
10143 if (!epartIsReg(opc
[2]))
10147 if (gregLO3ofRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
10150 case 0xC0: case 0xC1:
10151 if (!epartIsReg(opc
[2]))
10156 } /* switch (opc[1]) */
10162 } /* switch (opc[0]) */
10168 /*------------------------------------------------------------*/
10170 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10172 /*------------------------------------------------------------*/
10174 static Long
dis_COMISD ( const VexAbiInfo
* vbi
, Prefix pfx
,
10175 Long delta
, Bool isAvx
, UChar opc
)
10177 vassert(opc
== 0x2F/*COMISD*/ || opc
== 0x2E/*UCOMISD*/);
10180 IRTemp argL
= newTemp(Ity_F64
);
10181 IRTemp argR
= newTemp(Ity_F64
);
10182 UChar modrm
= getUChar(delta
);
10183 IRTemp addr
= IRTemp_INVALID
;
10184 if (epartIsReg(modrm
)) {
10185 assign( argR
, getXMMRegLane64F( eregOfRexRM(pfx
,modrm
),
10186 0/*lowest lane*/ ) );
10188 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10189 opc
==0x2E ? "u" : "",
10190 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10191 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10193 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10194 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
10196 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10197 opc
==0x2E ? "u" : "",
10199 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10201 assign( argL
, getXMMRegLane64F( gregOfRexRM(pfx
,modrm
),
10202 0/*lowest lane*/ ) );
10204 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10205 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10210 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)) ),
10217 static Long
dis_COMISS ( const VexAbiInfo
* vbi
, Prefix pfx
,
10218 Long delta
, Bool isAvx
, UChar opc
)
10220 vassert(opc
== 0x2F/*COMISS*/ || opc
== 0x2E/*UCOMISS*/);
10223 IRTemp argL
= newTemp(Ity_F32
);
10224 IRTemp argR
= newTemp(Ity_F32
);
10225 UChar modrm
= getUChar(delta
);
10226 IRTemp addr
= IRTemp_INVALID
;
10227 if (epartIsReg(modrm
)) {
10228 assign( argR
, getXMMRegLane32F( eregOfRexRM(pfx
,modrm
),
10229 0/*lowest lane*/ ) );
10231 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10232 opc
==0x2E ? "u" : "",
10233 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10234 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10236 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10237 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
10239 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10240 opc
==0x2E ? "u" : "",
10242 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10244 assign( argL
, getXMMRegLane32F( gregOfRexRM(pfx
,modrm
),
10245 0/*lowest lane*/ ) );
10247 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10248 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10254 unop(Iop_F32toF64
,mkexpr(argL
)),
10255 unop(Iop_F32toF64
,mkexpr(argR
)))),
10262 static Long
dis_PSHUFD_32x4 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10263 Long delta
, Bool writesYmm
)
10268 IRTemp sV
= newTemp(Ity_V128
);
10269 UChar modrm
= getUChar(delta
);
10270 const HChar
* strV
= writesYmm
? "v" : "";
10271 IRTemp addr
= IRTemp_INVALID
;
10272 if (epartIsReg(modrm
)) {
10273 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
10274 order
= (Int
)getUChar(delta
+1);
10276 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10277 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10278 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10280 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10281 1/*byte after the amode*/ );
10282 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10283 order
= (Int
)getUChar(delta
+alen
);
10285 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10287 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10290 IRTemp s3
, s2
, s1
, s0
;
10291 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
10292 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
10294 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10295 IRTemp dV
= newTemp(Ity_V128
);
10297 mkV128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
10298 SEL((order
>>2)&3), SEL((order
>>0)&3) )
10302 (writesYmm
? putYMMRegLoAndZU
: putXMMReg
)
10303 (gregOfRexRM(pfx
,modrm
), mkexpr(dV
));
10308 static Long
dis_PSHUFD_32x8 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
10313 IRTemp sV
= newTemp(Ity_V256
);
10314 UChar modrm
= getUChar(delta
);
10315 IRTemp addr
= IRTemp_INVALID
;
10316 UInt rG
= gregOfRexRM(pfx
,modrm
);
10317 if (epartIsReg(modrm
)) {
10318 UInt rE
= eregOfRexRM(pfx
,modrm
);
10319 assign( sV
, getYMMReg(rE
) );
10320 order
= (Int
)getUChar(delta
+1);
10322 DIP("vpshufd $%d,%s,%s\n", order
, nameYMMReg(rE
), nameYMMReg(rG
));
10324 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10325 1/*byte after the amode*/ );
10326 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10327 order
= (Int
)getUChar(delta
+alen
);
10329 DIP("vpshufd $%d,%s,%s\n", order
, dis_buf
, nameYMMReg(rG
));
10333 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
10334 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
10335 &s
[3], &s
[2], &s
[1], &s
[0] );
10337 putYMMReg( rG
, mkV256from32s( s
[4 + ((order
>>6)&3)],
10338 s
[4 + ((order
>>4)&3)],
10339 s
[4 + ((order
>>2)&3)],
10340 s
[4 + ((order
>>0)&3)],
10341 s
[0 + ((order
>>6)&3)],
10342 s
[0 + ((order
>>4)&3)],
10343 s
[0 + ((order
>>2)&3)],
10344 s
[0 + ((order
>>0)&3)] ) );
10349 static IRTemp
math_PSRLDQ ( IRTemp sV
, Int imm
)
10351 IRTemp dV
= newTemp(Ity_V128
);
10352 IRTemp hi64
= newTemp(Ity_I64
);
10353 IRTemp lo64
= newTemp(Ity_I64
);
10354 IRTemp hi64r
= newTemp(Ity_I64
);
10355 IRTemp lo64r
= newTemp(Ity_I64
);
10357 vassert(imm
>= 0 && imm
<= 255);
10359 assign(dV
, mkV128(0x0000));
10363 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10364 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10367 assign( lo64r
, mkexpr(lo64
) );
10368 assign( hi64r
, mkexpr(hi64
) );
10372 assign( hi64r
, mkU64(0) );
10373 assign( lo64r
, mkexpr(hi64
) );
10377 assign( hi64r
, mkU64(0) );
10378 assign( lo64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8( 8*(imm
-8) ) ));
10380 assign( hi64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8(8 * imm
) ));
10383 binop(Iop_Shr64
, mkexpr(lo64
),
10385 binop(Iop_Shl64
, mkexpr(hi64
),
10386 mkU8(8 * (8 - imm
)) )
10391 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10396 static IRTemp
math_PSLLDQ ( IRTemp sV
, Int imm
)
10398 IRTemp dV
= newTemp(Ity_V128
);
10399 IRTemp hi64
= newTemp(Ity_I64
);
10400 IRTemp lo64
= newTemp(Ity_I64
);
10401 IRTemp hi64r
= newTemp(Ity_I64
);
10402 IRTemp lo64r
= newTemp(Ity_I64
);
10404 vassert(imm
>= 0 && imm
<= 255);
10406 assign(dV
, mkV128(0x0000));
10410 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10411 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10414 assign( lo64r
, mkexpr(lo64
) );
10415 assign( hi64r
, mkexpr(hi64
) );
10419 assign( lo64r
, mkU64(0) );
10420 assign( hi64r
, mkexpr(lo64
) );
10424 assign( lo64r
, mkU64(0) );
10425 assign( hi64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8( 8*(imm
-8) ) ));
10427 assign( lo64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8(8 * imm
) ));
10430 binop(Iop_Shl64
, mkexpr(hi64
),
10432 binop(Iop_Shr64
, mkexpr(lo64
),
10433 mkU8(8 * (8 - imm
)) )
10438 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10443 static Long
dis_CVTxSD2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10444 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10446 vassert(opc
== 0x2D/*CVTSD2SI*/ || opc
== 0x2C/*CVTTSD2SI*/);
10449 UChar modrm
= getUChar(delta
);
10450 IRTemp addr
= IRTemp_INVALID
;
10451 IRTemp rmode
= newTemp(Ity_I32
);
10452 IRTemp f64lo
= newTemp(Ity_F64
);
10453 Bool r2zero
= toBool(opc
== 0x2C);
10455 if (epartIsReg(modrm
)) {
10457 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
10458 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10459 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10460 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10463 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10464 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10466 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10468 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10473 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10475 assign( rmode
, get_sse_roundingmode() );
10479 putIReg32( gregOfRexRM(pfx
,modrm
),
10480 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10483 putIReg64( gregOfRexRM(pfx
,modrm
),
10484 binop( Iop_F64toI64S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10491 static Long
dis_CVTxSS2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10492 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10494 vassert(opc
== 0x2D/*CVTSS2SI*/ || opc
== 0x2C/*CVTTSS2SI*/);
10497 UChar modrm
= getUChar(delta
);
10498 IRTemp addr
= IRTemp_INVALID
;
10499 IRTemp rmode
= newTemp(Ity_I32
);
10500 IRTemp f32lo
= newTemp(Ity_F32
);
10501 Bool r2zero
= toBool(opc
== 0x2C);
10503 if (epartIsReg(modrm
)) {
10505 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
10506 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10507 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10508 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10511 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10512 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10514 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10516 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10521 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10523 assign( rmode
, get_sse_roundingmode() );
10527 putIReg32( gregOfRexRM(pfx
,modrm
),
10528 binop( Iop_F64toI32S
,
10530 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10533 putIReg64( gregOfRexRM(pfx
,modrm
),
10534 binop( Iop_F64toI64S
,
10536 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10543 static Long
dis_CVTPS2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10544 Long delta
, Bool isAvx
)
10546 IRTemp addr
= IRTemp_INVALID
;
10549 IRTemp f32lo
= newTemp(Ity_F32
);
10550 IRTemp f32hi
= newTemp(Ity_F32
);
10551 UChar modrm
= getUChar(delta
);
10552 UInt rG
= gregOfRexRM(pfx
,modrm
);
10553 if (epartIsReg(modrm
)) {
10554 UInt rE
= eregOfRexRM(pfx
,modrm
);
10555 assign( f32lo
, getXMMRegLane32F(rE
, 0) );
10556 assign( f32hi
, getXMMRegLane32F(rE
, 1) );
10558 DIP("%scvtps2pd %s,%s\n",
10559 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10561 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10562 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
10563 assign( f32hi
, loadLE(Ity_F32
,
10564 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10566 DIP("%scvtps2pd %s,%s\n",
10567 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
10570 putXMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32hi
)) );
10571 putXMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32lo
)) );
10573 putYMMRegLane128( rG
, 1, mkV128(0));
10578 static Long
dis_CVTPS2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10581 IRTemp addr
= IRTemp_INVALID
;
10584 IRTemp f32_0
= newTemp(Ity_F32
);
10585 IRTemp f32_1
= newTemp(Ity_F32
);
10586 IRTemp f32_2
= newTemp(Ity_F32
);
10587 IRTemp f32_3
= newTemp(Ity_F32
);
10588 UChar modrm
= getUChar(delta
);
10589 UInt rG
= gregOfRexRM(pfx
,modrm
);
10590 if (epartIsReg(modrm
)) {
10591 UInt rE
= eregOfRexRM(pfx
,modrm
);
10592 assign( f32_0
, getXMMRegLane32F(rE
, 0) );
10593 assign( f32_1
, getXMMRegLane32F(rE
, 1) );
10594 assign( f32_2
, getXMMRegLane32F(rE
, 2) );
10595 assign( f32_3
, getXMMRegLane32F(rE
, 3) );
10597 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
10599 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10600 assign( f32_0
, loadLE(Ity_F32
, mkexpr(addr
)) );
10601 assign( f32_1
, loadLE(Ity_F32
,
10602 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10603 assign( f32_2
, loadLE(Ity_F32
,
10604 binop(Iop_Add64
,mkexpr(addr
),mkU64(8))) );
10605 assign( f32_3
, loadLE(Ity_F32
,
10606 binop(Iop_Add64
,mkexpr(addr
),mkU64(12))) );
10608 DIP("vcvtps2pd %s,%s\n", dis_buf
, nameYMMReg(rG
));
10611 putYMMRegLane64F( rG
, 3, unop(Iop_F32toF64
, mkexpr(f32_3
)) );
10612 putYMMRegLane64F( rG
, 2, unop(Iop_F32toF64
, mkexpr(f32_2
)) );
10613 putYMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32_1
)) );
10614 putYMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32_0
)) );
10619 static Long
dis_CVTPD2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10620 Long delta
, Bool isAvx
)
10622 IRTemp addr
= IRTemp_INVALID
;
10625 UChar modrm
= getUChar(delta
);
10626 UInt rG
= gregOfRexRM(pfx
,modrm
);
10627 IRTemp argV
= newTemp(Ity_V128
);
10628 IRTemp rmode
= newTemp(Ity_I32
);
10629 if (epartIsReg(modrm
)) {
10630 UInt rE
= eregOfRexRM(pfx
,modrm
);
10631 assign( argV
, getXMMReg(rE
) );
10633 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10634 nameXMMReg(rE
), nameXMMReg(rG
));
10636 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10637 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10639 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10640 dis_buf
, nameXMMReg(rG
) );
10643 assign( rmode
, get_sse_roundingmode() );
10644 IRTemp t0
= newTemp(Ity_F64
);
10645 IRTemp t1
= newTemp(Ity_F64
);
10646 assign( t0
, unop(Iop_ReinterpI64asF64
,
10647 unop(Iop_V128to64
, mkexpr(argV
))) );
10648 assign( t1
, unop(Iop_ReinterpI64asF64
,
10649 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10651 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10652 putXMMRegLane32( rG
, 3, mkU32(0) );
10653 putXMMRegLane32( rG
, 2, mkU32(0) );
10654 putXMMRegLane32F( rG
, 1, CVT(t1
) );
10655 putXMMRegLane32F( rG
, 0, CVT(t0
) );
10658 putYMMRegLane128( rG
, 1, mkV128(0) );
10664 static Long
dis_CVTxPS2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10665 Long delta
, Bool isAvx
, Bool r2zero
)
10667 IRTemp addr
= IRTemp_INVALID
;
10670 UChar modrm
= getUChar(delta
);
10671 IRTemp argV
= newTemp(Ity_V128
);
10672 IRTemp rmode
= newTemp(Ity_I32
);
10673 UInt rG
= gregOfRexRM(pfx
,modrm
);
10674 IRTemp t0
, t1
, t2
, t3
;
10676 if (epartIsReg(modrm
)) {
10677 UInt rE
= eregOfRexRM(pfx
,modrm
);
10678 assign( argV
, getXMMReg(rE
) );
10680 DIP("%scvt%sps2dq %s,%s\n",
10681 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10683 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10684 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10686 DIP("%scvt%sps2dq %s,%s\n",
10687 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10690 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10691 : get_sse_roundingmode() );
10692 t0
= t1
= t2
= t3
= IRTemp_INVALID
;
10693 breakupV128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
10694 /* This is less than ideal. If it turns out to be a performance
10695 bottleneck it can be improved. */
10697 binop( Iop_F64toI32S, \
10699 unop( Iop_F32toF64, \
10700 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10702 putXMMRegLane32( rG
, 3, CVT(t3
) );
10703 putXMMRegLane32( rG
, 2, CVT(t2
) );
10704 putXMMRegLane32( rG
, 1, CVT(t1
) );
10705 putXMMRegLane32( rG
, 0, CVT(t0
) );
10708 putYMMRegLane128( rG
, 1, mkV128(0) );
10714 static Long
dis_CVTxPS2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10715 Long delta
, Bool r2zero
)
10717 IRTemp addr
= IRTemp_INVALID
;
10720 UChar modrm
= getUChar(delta
);
10721 IRTemp argV
= newTemp(Ity_V256
);
10722 IRTemp rmode
= newTemp(Ity_I32
);
10723 UInt rG
= gregOfRexRM(pfx
,modrm
);
10724 IRTemp t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
;
10726 if (epartIsReg(modrm
)) {
10727 UInt rE
= eregOfRexRM(pfx
,modrm
);
10728 assign( argV
, getYMMReg(rE
) );
10730 DIP("vcvt%sps2dq %s,%s\n",
10731 r2zero
? "t" : "", nameYMMReg(rE
), nameYMMReg(rG
));
10733 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10734 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10736 DIP("vcvt%sps2dq %s,%s\n",
10737 r2zero
? "t" : "", dis_buf
, nameYMMReg(rG
) );
10740 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10741 : get_sse_roundingmode() );
10742 t0
= t1
= t2
= t3
= t4
= t5
= t6
= t7
= IRTemp_INVALID
;
10743 breakupV256to32s( argV
, &t7
, &t6
, &t5
, &t4
, &t3
, &t2
, &t1
, &t0
);
10744 /* This is less than ideal. If it turns out to be a performance
10745 bottleneck it can be improved. */
10747 binop( Iop_F64toI32S, \
10749 unop( Iop_F32toF64, \
10750 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10752 putYMMRegLane32( rG
, 7, CVT(t7
) );
10753 putYMMRegLane32( rG
, 6, CVT(t6
) );
10754 putYMMRegLane32( rG
, 5, CVT(t5
) );
10755 putYMMRegLane32( rG
, 4, CVT(t4
) );
10756 putYMMRegLane32( rG
, 3, CVT(t3
) );
10757 putYMMRegLane32( rG
, 2, CVT(t2
) );
10758 putYMMRegLane32( rG
, 1, CVT(t1
) );
10759 putYMMRegLane32( rG
, 0, CVT(t0
) );
10766 static Long
dis_CVTxPD2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10767 Long delta
, Bool isAvx
, Bool r2zero
)
10769 IRTemp addr
= IRTemp_INVALID
;
10772 UChar modrm
= getUChar(delta
);
10773 IRTemp argV
= newTemp(Ity_V128
);
10774 IRTemp rmode
= newTemp(Ity_I32
);
10775 UInt rG
= gregOfRexRM(pfx
,modrm
);
10778 if (epartIsReg(modrm
)) {
10779 UInt rE
= eregOfRexRM(pfx
,modrm
);
10780 assign( argV
, getXMMReg(rE
) );
10782 DIP("%scvt%spd2dq %s,%s\n",
10783 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10785 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10786 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10788 DIP("%scvt%spd2dqx %s,%s\n",
10789 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10793 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10795 assign( rmode
, get_sse_roundingmode() );
10798 t0
= newTemp(Ity_F64
);
10799 t1
= newTemp(Ity_F64
);
10800 assign( t0
, unop(Iop_ReinterpI64asF64
,
10801 unop(Iop_V128to64
, mkexpr(argV
))) );
10802 assign( t1
, unop(Iop_ReinterpI64asF64
,
10803 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10805 # define CVT(_t) binop( Iop_F64toI32S, \
10809 putXMMRegLane32( rG
, 3, mkU32(0) );
10810 putXMMRegLane32( rG
, 2, mkU32(0) );
10811 putXMMRegLane32( rG
, 1, CVT(t1
) );
10812 putXMMRegLane32( rG
, 0, CVT(t0
) );
10815 putYMMRegLane128( rG
, 1, mkV128(0) );
10821 static Long
dis_CVTxPD2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10822 Long delta
, Bool r2zero
)
10824 IRTemp addr
= IRTemp_INVALID
;
10827 UChar modrm
= getUChar(delta
);
10828 IRTemp argV
= newTemp(Ity_V256
);
10829 IRTemp rmode
= newTemp(Ity_I32
);
10830 UInt rG
= gregOfRexRM(pfx
,modrm
);
10831 IRTemp t0
, t1
, t2
, t3
;
10833 if (epartIsReg(modrm
)) {
10834 UInt rE
= eregOfRexRM(pfx
,modrm
);
10835 assign( argV
, getYMMReg(rE
) );
10837 DIP("vcvt%spd2dq %s,%s\n",
10838 r2zero
? "t" : "", nameYMMReg(rE
), nameXMMReg(rG
));
10840 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10841 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10843 DIP("vcvt%spd2dqy %s,%s\n",
10844 r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10848 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10850 assign( rmode
, get_sse_roundingmode() );
10853 t0
= IRTemp_INVALID
;
10854 t1
= IRTemp_INVALID
;
10855 t2
= IRTemp_INVALID
;
10856 t3
= IRTemp_INVALID
;
10857 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
10859 # define CVT(_t) binop( Iop_F64toI32S, \
10861 unop( Iop_ReinterpI64asF64, \
10864 putXMMRegLane32( rG
, 3, CVT(t3
) );
10865 putXMMRegLane32( rG
, 2, CVT(t2
) );
10866 putXMMRegLane32( rG
, 1, CVT(t1
) );
10867 putXMMRegLane32( rG
, 0, CVT(t0
) );
10869 putYMMRegLane128( rG
, 1, mkV128(0) );
10875 static Long
dis_CVTDQ2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10876 Long delta
, Bool isAvx
)
10878 IRTemp addr
= IRTemp_INVALID
;
10881 UChar modrm
= getUChar(delta
);
10882 IRTemp argV
= newTemp(Ity_V128
);
10883 IRTemp rmode
= newTemp(Ity_I32
);
10884 UInt rG
= gregOfRexRM(pfx
,modrm
);
10885 IRTemp t0
, t1
, t2
, t3
;
10887 if (epartIsReg(modrm
)) {
10888 UInt rE
= eregOfRexRM(pfx
,modrm
);
10889 assign( argV
, getXMMReg(rE
) );
10891 DIP("%scvtdq2ps %s,%s\n",
10892 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10894 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10895 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10897 DIP("%scvtdq2ps %s,%s\n",
10898 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
) );
10901 assign( rmode
, get_sse_roundingmode() );
10902 t0
= IRTemp_INVALID
;
10903 t1
= IRTemp_INVALID
;
10904 t2
= IRTemp_INVALID
;
10905 t3
= IRTemp_INVALID
;
10906 breakupV128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
10908 # define CVT(_t) binop( Iop_F64toF32, \
10910 unop(Iop_I32StoF64,mkexpr(_t)))
10912 putXMMRegLane32F( rG
, 3, CVT(t3
) );
10913 putXMMRegLane32F( rG
, 2, CVT(t2
) );
10914 putXMMRegLane32F( rG
, 1, CVT(t1
) );
10915 putXMMRegLane32F( rG
, 0, CVT(t0
) );
10918 putYMMRegLane128( rG
, 1, mkV128(0) );
10923 static Long
dis_CVTDQ2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10926 IRTemp addr
= IRTemp_INVALID
;
10929 UChar modrm
= getUChar(delta
);
10930 IRTemp argV
= newTemp(Ity_V256
);
10931 IRTemp rmode
= newTemp(Ity_I32
);
10932 UInt rG
= gregOfRexRM(pfx
,modrm
);
10933 IRTemp t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
;
10935 if (epartIsReg(modrm
)) {
10936 UInt rE
= eregOfRexRM(pfx
,modrm
);
10937 assign( argV
, getYMMReg(rE
) );
10939 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
10941 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10942 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10944 DIP("vcvtdq2ps %s,%s\n", dis_buf
, nameYMMReg(rG
) );
10947 assign( rmode
, get_sse_roundingmode() );
10948 t0
= IRTemp_INVALID
;
10949 t1
= IRTemp_INVALID
;
10950 t2
= IRTemp_INVALID
;
10951 t3
= IRTemp_INVALID
;
10952 t4
= IRTemp_INVALID
;
10953 t5
= IRTemp_INVALID
;
10954 t6
= IRTemp_INVALID
;
10955 t7
= IRTemp_INVALID
;
10956 breakupV256to32s( argV
, &t7
, &t6
, &t5
, &t4
, &t3
, &t2
, &t1
, &t0
);
10958 # define CVT(_t) binop( Iop_F64toF32, \
10960 unop(Iop_I32StoF64,mkexpr(_t)))
10962 putYMMRegLane32F( rG
, 7, CVT(t7
) );
10963 putYMMRegLane32F( rG
, 6, CVT(t6
) );
10964 putYMMRegLane32F( rG
, 5, CVT(t5
) );
10965 putYMMRegLane32F( rG
, 4, CVT(t4
) );
10966 putYMMRegLane32F( rG
, 3, CVT(t3
) );
10967 putYMMRegLane32F( rG
, 2, CVT(t2
) );
10968 putYMMRegLane32F( rG
, 1, CVT(t1
) );
10969 putYMMRegLane32F( rG
, 0, CVT(t0
) );
10976 static Long
dis_PMOVMSKB_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10977 Long delta
, Bool isAvx
)
10979 UChar modrm
= getUChar(delta
);
10980 vassert(epartIsReg(modrm
)); /* ensured by caller */
10981 UInt rE
= eregOfRexRM(pfx
,modrm
);
10982 UInt rG
= gregOfRexRM(pfx
,modrm
);
10983 IRTemp t0
= newTemp(Ity_V128
);
10984 IRTemp t1
= newTemp(Ity_I32
);
10985 assign(t0
, getXMMReg(rE
));
10986 assign(t1
, unop(Iop_16Uto32
, unop(Iop_GetMSBs8x16
, mkexpr(t0
))));
10987 putIReg32(rG
, mkexpr(t1
));
10988 DIP("%spmovmskb %s,%s\n", isAvx
? "v" : "", nameXMMReg(rE
),
10995 static Long
dis_PMOVMSKB_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10998 UChar modrm
= getUChar(delta
);
10999 vassert(epartIsReg(modrm
)); /* ensured by caller */
11000 UInt rE
= eregOfRexRM(pfx
,modrm
);
11001 UInt rG
= gregOfRexRM(pfx
,modrm
);
11002 IRTemp t0
= newTemp(Ity_V128
);
11003 IRTemp t1
= newTemp(Ity_V128
);
11004 IRTemp t2
= newTemp(Ity_I16
);
11005 IRTemp t3
= newTemp(Ity_I16
);
11006 assign(t0
, getYMMRegLane128(rE
, 0));
11007 assign(t1
, getYMMRegLane128(rE
, 1));
11008 assign(t2
, unop(Iop_GetMSBs8x16
, mkexpr(t0
)));
11009 assign(t3
, unop(Iop_GetMSBs8x16
, mkexpr(t1
)));
11010 putIReg32(rG
, binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)));
11011 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
11017 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
11018 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
11019 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
11020 static IRTemp
math_UNPCKxPS_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11022 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11023 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11024 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11025 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11026 IRTemp res
= newTemp(Ity_V128
);
11027 assign(res
, xIsH
? mkV128from32s( s3
, d3
, s2
, d2
)
11028 : mkV128from32s( s1
, d1
, s0
, d0
));
11033 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11034 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11035 static IRTemp
math_UNPCKxPD_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11037 IRTemp s1
= newTemp(Ity_I64
);
11038 IRTemp s0
= newTemp(Ity_I64
);
11039 IRTemp d1
= newTemp(Ity_I64
);
11040 IRTemp d0
= newTemp(Ity_I64
);
11041 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11042 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11043 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11044 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11045 IRTemp res
= newTemp(Ity_V128
);
11046 assign(res
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
))
11047 : binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)));
11052 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11053 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11054 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11056 static IRTemp
math_UNPCKxPD_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11058 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11059 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11060 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
11061 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
11062 IRTemp res
= newTemp(Ity_V256
);
11064 ? IRExpr_Qop(Iop_64x4toV256
, mkexpr(s3
), mkexpr(d3
),
11065 mkexpr(s1
), mkexpr(d1
))
11066 : IRExpr_Qop(Iop_64x4toV256
, mkexpr(s2
), mkexpr(d2
),
11067 mkexpr(s0
), mkexpr(d0
)));
11072 /* FIXME: this is really bad. Surely can do something better here?
11073 One observation is that the steering in the upper and lower 128 bit
11074 halves is the same as with math_UNPCKxPS_128, so we simply split
11075 into two halves, and use that. Consequently any improvement in
11076 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11077 benefits this too. */
11078 static IRTemp
math_UNPCKxPS_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11080 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11081 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11082 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11083 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11084 IRTemp rVhi
= math_UNPCKxPS_128(sVhi
, dVhi
, xIsH
);
11085 IRTemp rVlo
= math_UNPCKxPS_128(sVlo
, dVlo
, xIsH
);
11086 IRTemp rV
= newTemp(Ity_V256
);
11087 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11092 static IRTemp
math_SHUFPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11094 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11095 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11096 vassert(imm8
< 256);
11098 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11099 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11101 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11102 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11103 IRTemp res
= newTemp(Ity_V128
);
11105 mkV128from32s( SELS((imm8
>>6)&3), SELS((imm8
>>4)&3),
11106 SELD((imm8
>>2)&3), SELD((imm8
>>0)&3) ) );
11113 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11114 identically. Hence do the clueless thing and use math_SHUFPS_128
11116 static IRTemp
math_SHUFPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11118 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11119 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11120 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11121 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11122 IRTemp rVhi
= math_SHUFPS_128(sVhi
, dVhi
, imm8
);
11123 IRTemp rVlo
= math_SHUFPS_128(sVlo
, dVlo
, imm8
);
11124 IRTemp rV
= newTemp(Ity_V256
);
11125 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11130 static IRTemp
math_SHUFPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11132 IRTemp s1
= newTemp(Ity_I64
);
11133 IRTemp s0
= newTemp(Ity_I64
);
11134 IRTemp d1
= newTemp(Ity_I64
);
11135 IRTemp d0
= newTemp(Ity_I64
);
11137 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11138 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11139 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11140 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11142 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11143 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11145 IRTemp res
= newTemp(Ity_V128
);
11146 assign(res
, binop( Iop_64HLtoV128
,
11147 SELS((imm8
>>1)&1), SELD((imm8
>>0)&1) ) );
11155 static IRTemp
math_SHUFPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11157 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11158 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11159 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11160 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11161 IRTemp rVhi
= math_SHUFPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11162 IRTemp rVlo
= math_SHUFPD_128(sVlo
, dVlo
, imm8
& 3);
11163 IRTemp rV
= newTemp(Ity_V256
);
11164 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11169 static IRTemp
math_BLENDPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11171 UShort imm8_mask_16
;
11172 IRTemp imm8_mask
= newTemp(Ity_V128
);
11174 switch( imm8
& 3 ) {
11175 case 0: imm8_mask_16
= 0x0000; break;
11176 case 1: imm8_mask_16
= 0x00FF; break;
11177 case 2: imm8_mask_16
= 0xFF00; break;
11178 case 3: imm8_mask_16
= 0xFFFF; break;
11179 default: vassert(0); break;
11181 assign( imm8_mask
, mkV128( imm8_mask_16
) );
11183 IRTemp res
= newTemp(Ity_V128
);
11184 assign ( res
, binop( Iop_OrV128
,
11185 binop( Iop_AndV128
, mkexpr(sV
),
11186 mkexpr(imm8_mask
) ),
11187 binop( Iop_AndV128
, mkexpr(dV
),
11188 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11193 static IRTemp
math_BLENDPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11195 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11196 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11197 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11198 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11199 IRTemp rVhi
= math_BLENDPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11200 IRTemp rVlo
= math_BLENDPD_128(sVlo
, dVlo
, imm8
& 3);
11201 IRTemp rV
= newTemp(Ity_V256
);
11202 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11207 static IRTemp
math_BLENDPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11209 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11210 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11211 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11213 IRTemp imm8_mask
= newTemp(Ity_V128
);
11214 assign( imm8_mask
, mkV128( imm8_perms
[ (imm8
& 15) ] ) );
11216 IRTemp res
= newTemp(Ity_V128
);
11217 assign ( res
, binop( Iop_OrV128
,
11218 binop( Iop_AndV128
, mkexpr(sV
),
11219 mkexpr(imm8_mask
) ),
11220 binop( Iop_AndV128
, mkexpr(dV
),
11221 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11226 static IRTemp
math_BLENDPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11228 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11229 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11230 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11231 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11232 IRTemp rVhi
= math_BLENDPS_128(sVhi
, dVhi
, (imm8
>> 4) & 15);
11233 IRTemp rVlo
= math_BLENDPS_128(sVlo
, dVlo
, imm8
& 15);
11234 IRTemp rV
= newTemp(Ity_V256
);
11235 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11240 static IRTemp
math_PBLENDW_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11242 /* Make w be a 16-bit version of imm8, formed by duplicating each
11246 for (i
= 0; i
< 8; i
++) {
11247 if (imm8
& (1 << i
))
11248 imm16
|= (3 << (2*i
));
11250 IRTemp imm16_mask
= newTemp(Ity_V128
);
11251 assign( imm16_mask
, mkV128( imm16
));
11253 IRTemp res
= newTemp(Ity_V128
);
11254 assign ( res
, binop( Iop_OrV128
,
11255 binop( Iop_AndV128
, mkexpr(sV
),
11256 mkexpr(imm16_mask
) ),
11257 binop( Iop_AndV128
, mkexpr(dV
),
11258 unop( Iop_NotV128
, mkexpr(imm16_mask
) ) ) ) );
11263 static IRTemp
math_PMULUDQ_128 ( IRTemp sV
, IRTemp dV
)
11265 /* This is a really poor translation -- could be improved if
11266 performance critical */
11267 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11268 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11269 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11270 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11271 IRTemp res
= newTemp(Ity_V128
);
11272 assign(res
, binop(Iop_64HLtoV128
,
11273 binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)),
11274 binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) ));
11279 static IRTemp
math_PMULUDQ_256 ( IRTemp sV
, IRTemp dV
)
11281 /* This is a really poor translation -- could be improved if
11282 performance critical */
11283 IRTemp sHi
, sLo
, dHi
, dLo
;
11284 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11285 breakupV256toV128s( dV
, &dHi
, &dLo
);
11286 breakupV256toV128s( sV
, &sHi
, &sLo
);
11287 IRTemp res
= newTemp(Ity_V256
);
11288 assign(res
, binop(Iop_V128HLtoV256
,
11289 mkexpr(math_PMULUDQ_128(sHi
, dHi
)),
11290 mkexpr(math_PMULUDQ_128(sLo
, dLo
))));
11295 static IRTemp
math_PMULDQ_128 ( IRTemp dV
, IRTemp sV
)
11297 /* This is a really poor translation -- could be improved if
11298 performance critical */
11299 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11300 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11301 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11302 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11303 IRTemp res
= newTemp(Ity_V128
);
11304 assign(res
, binop(Iop_64HLtoV128
,
11305 binop( Iop_MullS32
, mkexpr(d2
), mkexpr(s2
)),
11306 binop( Iop_MullS32
, mkexpr(d0
), mkexpr(s0
)) ));
11311 static IRTemp
math_PMULDQ_256 ( IRTemp sV
, IRTemp dV
)
11313 /* This is a really poor translation -- could be improved if
11314 performance critical */
11315 IRTemp sHi
, sLo
, dHi
, dLo
;
11316 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11317 breakupV256toV128s( dV
, &dHi
, &dLo
);
11318 breakupV256toV128s( sV
, &sHi
, &sLo
);
11319 IRTemp res
= newTemp(Ity_V256
);
11320 assign(res
, binop(Iop_V128HLtoV256
,
11321 mkexpr(math_PMULDQ_128(sHi
, dHi
)),
11322 mkexpr(math_PMULDQ_128(sLo
, dLo
))));
11327 static IRTemp
math_PMADDWD_128 ( IRTemp dV
, IRTemp sV
)
11329 IRTemp sVhi
, sVlo
, dVhi
, dVlo
;
11330 IRTemp resHi
= newTemp(Ity_I64
);
11331 IRTemp resLo
= newTemp(Ity_I64
);
11332 sVhi
= sVlo
= dVhi
= dVlo
= IRTemp_INVALID
;
11333 breakupV128to64s( sV
, &sVhi
, &sVlo
);
11334 breakupV128to64s( dV
, &dVhi
, &dVlo
);
11335 assign( resHi
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11336 "amd64g_calculate_mmx_pmaddwd",
11337 &amd64g_calculate_mmx_pmaddwd
,
11338 mkIRExprVec_2( mkexpr(sVhi
), mkexpr(dVhi
))));
11339 assign( resLo
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11340 "amd64g_calculate_mmx_pmaddwd",
11341 &amd64g_calculate_mmx_pmaddwd
,
11342 mkIRExprVec_2( mkexpr(sVlo
), mkexpr(dVlo
))));
11343 IRTemp res
= newTemp(Ity_V128
);
11344 assign( res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
))) ;
11349 static IRTemp
math_PMADDWD_256 ( IRTemp dV
, IRTemp sV
)
11351 IRTemp sHi
, sLo
, dHi
, dLo
;
11352 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11353 breakupV256toV128s( dV
, &dHi
, &dLo
);
11354 breakupV256toV128s( sV
, &sHi
, &sLo
);
11355 IRTemp res
= newTemp(Ity_V256
);
11356 assign(res
, binop(Iop_V128HLtoV256
,
11357 mkexpr(math_PMADDWD_128(dHi
, sHi
)),
11358 mkexpr(math_PMADDWD_128(dLo
, sLo
))));
11363 static IRTemp
math_ADDSUBPD_128 ( IRTemp dV
, IRTemp sV
)
11365 IRTemp addV
= newTemp(Ity_V128
);
11366 IRTemp subV
= newTemp(Ity_V128
);
11367 IRTemp a1
= newTemp(Ity_I64
);
11368 IRTemp s0
= newTemp(Ity_I64
);
11369 IRTemp rm
= newTemp(Ity_I32
);
11371 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11372 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11373 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11375 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11376 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11378 IRTemp res
= newTemp(Ity_V128
);
11379 assign( res
, binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11384 static IRTemp
math_ADDSUBPD_256 ( IRTemp dV
, IRTemp sV
)
11386 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11387 IRTemp addV
= newTemp(Ity_V256
);
11388 IRTemp subV
= newTemp(Ity_V256
);
11389 IRTemp rm
= newTemp(Ity_I32
);
11390 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11392 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11393 assign( addV
, triop(Iop_Add64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11394 assign( subV
, triop(Iop_Sub64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11396 breakupV256to64s( addV
, &a3
, &a2
, &a1
, &a0
);
11397 breakupV256to64s( subV
, &s3
, &s2
, &s1
, &s0
);
11399 IRTemp res
= newTemp(Ity_V256
);
11400 assign( res
, mkV256from64s( a3
, s2
, a1
, s0
) );
11405 static IRTemp
math_ADDSUBPS_128 ( IRTemp dV
, IRTemp sV
)
11407 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11408 IRTemp addV
= newTemp(Ity_V128
);
11409 IRTemp subV
= newTemp(Ity_V128
);
11410 IRTemp rm
= newTemp(Ity_I32
);
11411 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11413 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11414 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11415 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11417 breakupV128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11418 breakupV128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11420 IRTemp res
= newTemp(Ity_V128
);
11421 assign( res
, mkV128from32s( a3
, s2
, a1
, s0
) );
11426 static IRTemp
math_ADDSUBPS_256 ( IRTemp dV
, IRTemp sV
)
11428 IRTemp a7
, a6
, a5
, a4
, a3
, a2
, a1
, a0
;
11429 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
11430 IRTemp addV
= newTemp(Ity_V256
);
11431 IRTemp subV
= newTemp(Ity_V256
);
11432 IRTemp rm
= newTemp(Ity_I32
);
11433 a7
= a6
= a5
= a4
= a3
= a2
= a1
= a0
= IRTemp_INVALID
;
11434 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11436 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11437 assign( addV
, triop(Iop_Add32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11438 assign( subV
, triop(Iop_Sub32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11440 breakupV256to32s( addV
, &a7
, &a6
, &a5
, &a4
, &a3
, &a2
, &a1
, &a0
);
11441 breakupV256to32s( subV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
11443 IRTemp res
= newTemp(Ity_V256
);
11444 assign( res
, mkV256from32s( a7
, s6
, a5
, s4
, a3
, s2
, a1
, s0
) );
11449 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11450 static Long
dis_PSHUFxW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11451 Long delta
, Bool isAvx
, Bool xIsH
)
11453 IRTemp addr
= IRTemp_INVALID
;
11456 UChar modrm
= getUChar(delta
);
11457 UInt rG
= gregOfRexRM(pfx
,modrm
);
11459 IRTemp sVmut
, dVmut
, sVcon
, sV
, dV
, s3
, s2
, s1
, s0
;
11460 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11461 sV
= newTemp(Ity_V128
);
11462 dV
= newTemp(Ity_V128
);
11463 sVmut
= newTemp(Ity_I64
);
11464 dVmut
= newTemp(Ity_I64
);
11465 sVcon
= newTemp(Ity_I64
);
11466 if (epartIsReg(modrm
)) {
11467 UInt rE
= eregOfRexRM(pfx
,modrm
);
11468 assign( sV
, getXMMReg(rE
) );
11469 imm8
= (UInt
)getUChar(delta
+1);
11471 DIP("%spshuf%cw $%u,%s,%s\n",
11472 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11473 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
11475 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11476 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11477 imm8
= (UInt
)getUChar(delta
+alen
);
11479 DIP("%spshuf%cw $%u,%s,%s\n",
11480 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11481 imm8
, dis_buf
, nameXMMReg(rG
));
11484 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11486 assign( sVmut
, unop(xIsH
? Iop_V128HIto64
: Iop_V128to64
, mkexpr(sV
)) );
11487 assign( sVcon
, unop(xIsH
? Iop_V128to64
: Iop_V128HIto64
, mkexpr(sV
)) );
11489 breakup64to16s( sVmut
, &s3
, &s2
, &s1
, &s0
);
11491 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11492 assign(dVmut
, mk64from16s( SEL((imm8
>>6)&3), SEL((imm8
>>4)&3),
11493 SEL((imm8
>>2)&3), SEL((imm8
>>0)&3) ));
11496 assign(dV
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(dVmut
), mkexpr(sVcon
))
11497 : binop(Iop_64HLtoV128
, mkexpr(sVcon
), mkexpr(dVmut
)) );
11499 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)(rG
, mkexpr(dV
));
11504 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11505 static Long
dis_PSHUFxW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11506 Long delta
, Bool xIsH
)
11508 IRTemp addr
= IRTemp_INVALID
;
11511 UChar modrm
= getUChar(delta
);
11512 UInt rG
= gregOfRexRM(pfx
,modrm
);
11514 IRTemp sV
, s
[8], sV64
[4], dVhi
, dVlo
;
11515 sV64
[3] = sV64
[2] = sV64
[1] = sV64
[0] = IRTemp_INVALID
;
11516 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
11517 sV
= newTemp(Ity_V256
);
11518 dVhi
= newTemp(Ity_I64
);
11519 dVlo
= newTemp(Ity_I64
);
11520 if (epartIsReg(modrm
)) {
11521 UInt rE
= eregOfRexRM(pfx
,modrm
);
11522 assign( sV
, getYMMReg(rE
) );
11523 imm8
= (UInt
)getUChar(delta
+1);
11525 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11526 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
11528 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11529 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
11530 imm8
= (UInt
)getUChar(delta
+alen
);
11532 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11533 imm8
, dis_buf
, nameYMMReg(rG
));
11536 breakupV256to64s( sV
, &sV64
[3], &sV64
[2], &sV64
[1], &sV64
[0] );
11537 breakup64to16s( sV64
[xIsH
? 3 : 2], &s
[7], &s
[6], &s
[5], &s
[4] );
11538 breakup64to16s( sV64
[xIsH
? 1 : 0], &s
[3], &s
[2], &s
[1], &s
[0] );
11540 assign( dVhi
, mk64from16s( s
[4 + ((imm8
>>6)&3)], s
[4 + ((imm8
>>4)&3)],
11541 s
[4 + ((imm8
>>2)&3)], s
[4 + ((imm8
>>0)&3)] ) );
11542 assign( dVlo
, mk64from16s( s
[0 + ((imm8
>>6)&3)], s
[0 + ((imm8
>>4)&3)],
11543 s
[0 + ((imm8
>>2)&3)], s
[0 + ((imm8
>>0)&3)] ) );
11544 putYMMReg( rG
, mkV256from64s( xIsH
? dVhi
: sV64
[3],
11545 xIsH
? sV64
[2] : dVhi
,
11546 xIsH
? dVlo
: sV64
[1],
11547 xIsH
? sV64
[0] : dVlo
) );
11552 static Long
dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo
* vbi
, Prefix pfx
,
11553 Long delta
, Bool isAvx
)
11555 Long deltaIN
= delta
;
11556 UChar modrm
= getUChar(delta
);
11557 UInt rG
= gregOfRexRM(pfx
,modrm
);
11558 IRTemp sV
= newTemp(Ity_V128
);
11559 IRTemp d16
= newTemp(Ity_I16
);
11561 IRTemp s0
, s1
, s2
, s3
;
11562 if (epartIsReg(modrm
)) {
11563 UInt rE
= eregOfRexRM(pfx
,modrm
);
11564 assign(sV
, getXMMReg(rE
));
11565 imm8
= getUChar(delta
+1) & 7;
11567 DIP("%spextrw $%u,%s,%s\n", isAvx
? "v" : "",
11568 imm8
, nameXMMReg(rE
), nameIReg32(rG
));
11570 /* The memory case is disallowed, apparently. */
11571 return deltaIN
; /* FAIL */
11573 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11574 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11576 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(s0
))); break;
11577 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(s0
))); break;
11578 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(s1
))); break;
11579 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(s1
))); break;
11580 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(s2
))); break;
11581 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(s2
))); break;
11582 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(s3
))); break;
11583 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(s3
))); break;
11584 default: vassert(0);
11586 putIReg32(rG
, unop(Iop_16Uto32
, mkexpr(d16
)));
11591 static Long
dis_CVTDQ2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11592 Long delta
, Bool isAvx
)
11594 IRTemp addr
= IRTemp_INVALID
;
11597 UChar modrm
= getUChar(delta
);
11598 IRTemp arg64
= newTemp(Ity_I64
);
11599 UInt rG
= gregOfRexRM(pfx
,modrm
);
11600 const HChar
* mbV
= isAvx
? "v" : "";
11601 if (epartIsReg(modrm
)) {
11602 UInt rE
= eregOfRexRM(pfx
,modrm
);
11603 assign( arg64
, getXMMRegLane64(rE
, 0) );
11605 DIP("%scvtdq2pd %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
11607 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11608 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
11610 DIP("%scvtdq2pd %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
11614 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
11618 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
11621 putYMMRegLane128(rG
, 1, mkV128(0));
11626 static Long
dis_STMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11627 Long delta
, Bool isAvx
)
11629 IRTemp addr
= IRTemp_INVALID
;
11632 UChar modrm
= getUChar(delta
);
11633 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11634 vassert(gregOfRexRM(pfx
,modrm
) == 3); /* ditto */
11636 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11639 /* Fake up a native SSE mxcsr word. The only thing it depends on
11640 is SSEROUND[1:0], so call a clean helper to cook it up.
11642 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11643 DIP("%sstmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11648 Ity_I64
, 0/*regp*/,
11649 "amd64g_create_mxcsr", &amd64g_create_mxcsr
,
11650 mkIRExprVec_1( unop(Iop_32Uto64
,get_sse_roundingmode()) )
11658 static Long
dis_LDMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11659 Long delta
, Bool isAvx
)
11661 IRTemp addr
= IRTemp_INVALID
;
11664 UChar modrm
= getUChar(delta
);
11665 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11666 vassert(gregOfRexRM(pfx
,modrm
) == 2); /* ditto */
11668 IRTemp t64
= newTemp(Ity_I64
);
11669 IRTemp ew
= newTemp(Ity_I32
);
11671 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11673 DIP("%sldmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11675 /* The only thing we observe in %mxcsr is the rounding mode.
11676 Therefore, pass the 32-bit value (SSE native-format control
11677 word) to a clean helper, getting back a 64-bit value, the
11678 lower half of which is the SSEROUND value to store, and the
11679 upper half of which is the emulation-warning token which may
11682 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11683 assign( t64
, mkIRExprCCall(
11684 Ity_I64
, 0/*regparms*/,
11685 "amd64g_check_ldmxcsr",
11686 &amd64g_check_ldmxcsr
,
11689 loadLE(Ity_I32
, mkexpr(addr
))
11695 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
11696 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
11697 put_emwarn( mkexpr(ew
) );
11698 /* Finally, if an emulation warning was reported, side-exit to
11699 the next insn, reporting the warning, so that Valgrind's
11700 dispatcher sees the warning. */
11703 binop(Iop_CmpNE64
, unop(Iop_32Uto64
,mkexpr(ew
)), mkU64(0)),
11705 IRConst_U64(guest_RIP_bbstart
+delta
),
11713 static void gen_XSAVE_SEQUENCE ( IRTemp addr
, IRTemp rfbm
)
11715 /* ------ rfbm[0] gates the x87 state ------ */
11717 /* Uses dirty helper:
11718 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11720 IRDirty
* d0
= unsafeIRDirty_0_N (
11722 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11723 &amd64g_dirtyhelper_XSAVE_COMPONENT_0
,
11724 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11726 d0
->guard
= binop(Iop_CmpEQ64
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)),
11729 /* Declare we're writing memory. Really, bytes 24 through 31
11730 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11731 than 1 memory area here, so just mark the whole thing as
11733 d0
->mFx
= Ifx_Write
;
11734 d0
->mAddr
= mkexpr(addr
);
11737 /* declare we're reading guest state */
11739 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
11741 d0
->fxState
[0].fx
= Ifx_Read
;
11742 d0
->fxState
[0].offset
= OFFB_FTOP
;
11743 d0
->fxState
[0].size
= sizeof(UInt
);
11745 d0
->fxState
[1].fx
= Ifx_Read
;
11746 d0
->fxState
[1].offset
= OFFB_FPREGS
;
11747 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
11749 d0
->fxState
[2].fx
= Ifx_Read
;
11750 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
11751 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
11753 d0
->fxState
[3].fx
= Ifx_Read
;
11754 d0
->fxState
[3].offset
= OFFB_FPROUND
;
11755 d0
->fxState
[3].size
= sizeof(ULong
);
11757 d0
->fxState
[4].fx
= Ifx_Read
;
11758 d0
->fxState
[4].offset
= OFFB_FC3210
;
11759 d0
->fxState
[4].size
= sizeof(ULong
);
11761 stmt( IRStmt_Dirty(d0
) );
11763 /* ------ rfbm[1] gates the SSE state ------ */
11765 IRTemp rfbm_1
= newTemp(Ity_I64
);
11766 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
11767 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
11768 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
11770 IRExpr
* guard_1
= binop(Iop_CmpEQ64
, mkexpr(rfbm_1
), mkU64(2));
11771 IRExpr
* guard_1or2
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
11773 /* Uses dirty helper:
11774 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11775 ( VexGuestAMD64State*, ULong )
11776 This creates only MXCSR and MXCSR_MASK. We need to do this if
11777 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11778 guard condition is a bit more complex.
11780 IRDirty
* d1
= unsafeIRDirty_0_N (
11782 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11783 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
,
11784 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11786 d1
->guard
= guard_1or2
;
11788 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11789 the code for rbfm[0] just above claims a write of 0 .. 159, so
11790 this duplicates it. But at least correctly connects 24 .. 31 to
11791 the MXCSR guest state representation (SSEROUND field). */
11792 d1
->mFx
= Ifx_Write
;
11793 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
11796 /* declare we're reading guest state */
11798 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
11800 d1
->fxState
[0].fx
= Ifx_Read
;
11801 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
11802 d1
->fxState
[0].size
= sizeof(ULong
);
11804 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11805 else. We do the actual register array, XMM[0..15], separately,
11806 in order that any undefinedness in the XMM registers is tracked
11807 separately by Memcheck and does not "infect" the in-memory
11808 shadow for the other parts of the image. */
11809 stmt( IRStmt_Dirty(d1
) );
11811 /* And now the XMMs themselves. */
11813 for (reg
= 0; reg
< 16; reg
++) {
11814 stmt( IRStmt_StoreG(
11816 binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16)),
11822 /* ------ rfbm[2] gates the AVX state ------ */
11823 /* Component 2 is just a bunch of register saves, so we'll do it
11824 inline, just to be simple and to be Memcheck friendly. */
11826 IRTemp rfbm_2
= newTemp(Ity_I64
);
11827 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
11829 IRExpr
* guard_2
= binop(Iop_CmpEQ64
, mkexpr(rfbm_2
), mkU64(4));
11831 for (reg
= 0; reg
< 16; reg
++) {
11832 stmt( IRStmt_StoreG(
11834 binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16)),
11835 getYMMRegLane128(reg
,1),
11842 static Long
dis_XSAVE ( const VexAbiInfo
* vbi
,
11843 Prefix pfx
, Long delta
, Int sz
)
11845 /* Note that the presence or absence of REX.W (indicated here by
11846 |sz|) slightly affects the written format: whether the saved FPU
11847 IP and DP pointers are 64 or 32 bits. But the helper function
11848 we call simply writes zero bits in the relevant fields, which
11849 are 64 bits regardless of what REX.W is, and so it's good enough
11850 (iow, equally broken) in both cases. */
11851 IRTemp addr
= IRTemp_INVALID
;
11854 UChar modrm
= getUChar(delta
);
11855 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11856 vassert(sz
== 4 || sz
== 8); /* ditto */
11858 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11860 gen_SEGV_if_not_64_aligned(addr
);
11862 DIP("%sxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11864 /* VEX's caller is assumed to have checked this. */
11865 const ULong aSSUMED_XCR0_VALUE
= 7;
11867 IRTemp rfbm
= newTemp(Ity_I64
);
11872 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
11873 unop(Iop_32Uto64
, getIRegRAX(4))),
11874 mkU64(aSSUMED_XCR0_VALUE
)));
11876 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11878 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11879 OR-ing the RFBM value into it. */
11880 IRTemp addr_plus_512
= newTemp(Ity_I64
);
11881 assign(addr_plus_512
, binop(Iop_Add64
, mkexpr(addr
), mkU64(512)));
11882 storeLE( mkexpr(addr_plus_512
),
11884 unop(Iop_64to8
, mkexpr(rfbm
)),
11885 loadLE(Ity_I8
, mkexpr(addr_plus_512
))) );
11891 static Long
dis_FXSAVE ( const VexAbiInfo
* vbi
,
11892 Prefix pfx
, Long delta
, Int sz
)
11894 /* See comment in dis_XSAVE about the significance of REX.W. */
11895 IRTemp addr
= IRTemp_INVALID
;
11898 UChar modrm
= getUChar(delta
);
11899 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11900 vassert(sz
== 4 || sz
== 8); /* ditto */
11902 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11904 gen_SEGV_if_not_16_aligned(addr
);
11906 DIP("%sfxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11908 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11909 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11910 fold out the unused (AVX) parts accordingly. */
11911 IRTemp rfbm
= newTemp(Ity_I64
);
11912 assign(rfbm
, mkU64(3));
11913 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11919 static void gen_XRSTOR_SEQUENCE ( IRTemp addr
, IRTemp xstate_bv
, IRTemp rfbm
)
11921 /* ------ rfbm[0] gates the x87 state ------ */
11923 /* If rfbm[0] == 1, we have to write the x87 state. If
11924 xstate_bv[0] == 1, we will read it from the memory image, else
11925 we'll set it to initial values. Doing this with a helper
11926 function and getting the definedness flow annotations correct is
11927 too difficult, so generate stupid but simple code: first set the
11928 registers to initial values, regardless of xstate_bv[0]. Then,
11929 conditionally restore from the memory image. */
11931 IRTemp rfbm_0
= newTemp(Ity_I64
);
11932 IRTemp xstate_bv_0
= newTemp(Ity_I64
);
11933 IRTemp restore_0
= newTemp(Ity_I64
);
11934 assign(rfbm_0
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)));
11935 assign(xstate_bv_0
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(1)));
11936 assign(restore_0
, binop(Iop_And64
, mkexpr(rfbm_0
), mkexpr(xstate_bv_0
)));
11938 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64
, mkexpr(rfbm_0
), mkU64(0)) );
11940 /* Uses dirty helper:
11941 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11943 IRDirty
* d0
= unsafeIRDirty_0_N (
11945 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
11946 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0
,
11947 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11949 d0
->guard
= binop(Iop_CmpNE64
, mkexpr(restore_0
), mkU64(0));
11951 /* Declare we're reading memory. Really, bytes 24 through 31
11952 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
11953 than 1 memory area here, so just mark the whole thing as
11955 d0
->mFx
= Ifx_Read
;
11956 d0
->mAddr
= mkexpr(addr
);
11959 /* declare we're writing guest state */
11961 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
11963 d0
->fxState
[0].fx
= Ifx_Write
;
11964 d0
->fxState
[0].offset
= OFFB_FTOP
;
11965 d0
->fxState
[0].size
= sizeof(UInt
);
11967 d0
->fxState
[1].fx
= Ifx_Write
;
11968 d0
->fxState
[1].offset
= OFFB_FPREGS
;
11969 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
11971 d0
->fxState
[2].fx
= Ifx_Write
;
11972 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
11973 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
11975 d0
->fxState
[3].fx
= Ifx_Write
;
11976 d0
->fxState
[3].offset
= OFFB_FPROUND
;
11977 d0
->fxState
[3].size
= sizeof(ULong
);
11979 d0
->fxState
[4].fx
= Ifx_Write
;
11980 d0
->fxState
[4].offset
= OFFB_FC3210
;
11981 d0
->fxState
[4].size
= sizeof(ULong
);
11983 stmt( IRStmt_Dirty(d0
) );
11985 /* ------ rfbm[1] gates the SSE state ------ */
11987 /* Same scheme as component 0: first zero it out, and then possibly
11988 restore from the memory area. */
11989 IRTemp rfbm_1
= newTemp(Ity_I64
);
11990 IRTemp xstate_bv_1
= newTemp(Ity_I64
);
11991 IRTemp restore_1
= newTemp(Ity_I64
);
11992 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
11993 assign(xstate_bv_1
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(2)));
11994 assign(restore_1
, binop(Iop_And64
, mkexpr(rfbm_1
), mkexpr(xstate_bv_1
)));
11995 IRExpr
* rfbm_1e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1
), mkU64(0));
11996 IRExpr
* restore_1e
= binop(Iop_CmpNE64
, mkexpr(restore_1
), mkU64(0));
11998 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
11999 IRTemp xstate_bv_1or2
= newTemp(Ity_I64
);
12000 IRTemp restore_1or2
= newTemp(Ity_I64
);
12001 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
12002 assign(xstate_bv_1or2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(6)));
12003 assign(restore_1or2
, binop(Iop_And64
, mkexpr(rfbm_1or2
),
12004 mkexpr(xstate_bv_1or2
)));
12005 IRExpr
* rfbm_1or2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
12006 IRExpr
* restore_1or2e
= binop(Iop_CmpNE64
, mkexpr(restore_1or2
), mkU64(0));
12008 /* The areas in question are: SSEROUND, and the XMM register array. */
12009 putGuarded(OFFB_SSEROUND
, rfbm_1or2e
, mkU64(Irrm_NEAREST
));
12012 for (reg
= 0; reg
< 16; reg
++) {
12013 putGuarded(xmmGuestRegOffset(reg
), rfbm_1e
, mkV128(0));
12016 /* And now possibly restore from MXCSR/MXCSR_MASK */
12017 /* Uses dirty helper:
12018 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
12019 ( VexGuestAMD64State*, ULong )
12020 This restores from only MXCSR and MXCSR_MASK. We need to do
12021 this if either components 1 (SSE) or 2 (AVX) are requested.
12022 Hence the guard condition is a bit more complex.
12024 IRDirty
* d1
= unsafeIRDirty_0_N (
12026 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12027 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
,
12028 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
12030 d1
->guard
= restore_1or2e
;
12032 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12033 the code for rbfm[0] just above claims a read of 0 .. 159, so
12034 this duplicates it. But at least correctly connects 24 .. 31 to
12035 the MXCSR guest state representation (SSEROUND field). */
12036 d1
->mFx
= Ifx_Read
;
12037 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
12040 /* declare we're writing guest state */
12042 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
12044 d1
->fxState
[0].fx
= Ifx_Write
;
12045 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
12046 d1
->fxState
[0].size
= sizeof(ULong
);
12048 /* Call the helper. This creates SSEROUND but nothing
12049 else. We do the actual register array, XMM[0..15], separately,
12050 in order that any undefinedness in the XMM registers is tracked
12051 separately by Memcheck and is not "infected" by the in-memory
12052 shadow for the other parts of the image. */
12053 stmt( IRStmt_Dirty(d1
) );
12055 /* And now the XMMs themselves. For each register, we PUT either
12056 its old value, or the value loaded from memory. One convenient
12057 way to do that is with a conditional load that has its the
12058 default value, the old value of the register. */
12059 for (reg
= 0; reg
< 16; reg
++) {
12060 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16));
12061 IRExpr
* alt
= getXMMReg(reg
);
12062 IRTemp loadedValue
= newTemp(Ity_V128
);
12063 stmt( IRStmt_LoadG(Iend_LE
,
12065 loadedValue
, ea
, alt
, restore_1e
) );
12066 putXMMReg(reg
, mkexpr(loadedValue
));
12069 /* ------ rfbm[2] gates the AVX state ------ */
12070 /* Component 2 is just a bunch of register loads, so we'll do it
12071 inline, just to be simple and to be Memcheck friendly. */
12073 /* Same scheme as component 0: first zero it out, and then possibly
12074 restore from the memory area. */
12075 IRTemp rfbm_2
= newTemp(Ity_I64
);
12076 IRTemp xstate_bv_2
= newTemp(Ity_I64
);
12077 IRTemp restore_2
= newTemp(Ity_I64
);
12078 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
12079 assign(xstate_bv_2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(4)));
12080 assign(restore_2
, binop(Iop_And64
, mkexpr(rfbm_2
), mkexpr(xstate_bv_2
)));
12082 IRExpr
* rfbm_2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_2
), mkU64(0));
12083 IRExpr
* restore_2e
= binop(Iop_CmpNE64
, mkexpr(restore_2
), mkU64(0));
12085 for (reg
= 0; reg
< 16; reg
++) {
12086 putGuarded(ymmGuestRegLane128offset(reg
, 1), rfbm_2e
, mkV128(0));
12089 for (reg
= 0; reg
< 16; reg
++) {
12090 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16));
12091 IRExpr
* alt
= getYMMRegLane128(reg
, 1);
12092 IRTemp loadedValue
= newTemp(Ity_V128
);
12093 stmt( IRStmt_LoadG(Iend_LE
,
12095 loadedValue
, ea
, alt
, restore_2e
) );
12096 putYMMRegLane128(reg
, 1, mkexpr(loadedValue
));
12101 static Long
dis_XRSTOR ( const VexAbiInfo
* vbi
,
12102 Prefix pfx
, Long delta
, Int sz
)
12104 /* As with XRSTOR above we ignore the value of REX.W since we're
12105 not bothering with the FPU DP and IP fields. */
12106 IRTemp addr
= IRTemp_INVALID
;
12109 UChar modrm
= getUChar(delta
);
12110 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12111 vassert(sz
== 4 || sz
== 8); /* ditto */
12113 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12115 gen_SEGV_if_not_64_aligned(addr
);
12117 DIP("%sxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12119 /* VEX's caller is assumed to have checked this. */
12120 const ULong aSSUMED_XCR0_VALUE
= 7;
12122 IRTemp rfbm
= newTemp(Ity_I64
);
12127 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
12128 unop(Iop_32Uto64
, getIRegRAX(4))),
12129 mkU64(aSSUMED_XCR0_VALUE
)));
12131 IRTemp xstate_bv
= newTemp(Ity_I64
);
12132 assign(xstate_bv
, loadLE(Ity_I64
,
12133 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+0))));
12135 IRTemp xcomp_bv
= newTemp(Ity_I64
);
12136 assign(xcomp_bv
, loadLE(Ity_I64
,
12137 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+8))));
12139 IRTemp xsavehdr_23_16
= newTemp(Ity_I64
);
12140 assign( xsavehdr_23_16
,
12142 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+16))));
12144 /* We must fault if
12145 * xcomp_bv[63] == 1, since this simulated CPU does not support
12146 the compaction extension.
12147 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12148 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12149 imply that xcomp_bv must be zero.
12150 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12152 IRTemp fault_if_nonzero
= newTemp(Ity_I64
);
12153 assign(fault_if_nonzero
,
12155 binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(~aSSUMED_XCR0_VALUE
)),
12156 binop(Iop_Or64
, mkexpr(xcomp_bv
), mkexpr(xsavehdr_23_16
))));
12157 stmt( IRStmt_Exit(binop(Iop_CmpNE64
, mkexpr(fault_if_nonzero
), mkU64(0)),
12159 IRConst_U64(guest_RIP_curr_instr
),
12163 /* We are guaranteed now that both xstate_bv and rfbm are in the
12164 range 0 .. 7. Generate the restore sequence proper. */
12165 gen_XRSTOR_SEQUENCE(addr
, xstate_bv
, rfbm
);
12171 static Long
dis_FXRSTOR ( const VexAbiInfo
* vbi
,
12172 Prefix pfx
, Long delta
, Int sz
)
12174 /* As with FXSAVE above we ignore the value of REX.W since we're
12175 not bothering with the FPU DP and IP fields. */
12176 IRTemp addr
= IRTemp_INVALID
;
12179 UChar modrm
= getUChar(delta
);
12180 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12181 vassert(sz
== 4 || sz
== 8); /* ditto */
12183 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12185 gen_SEGV_if_not_16_aligned(addr
);
12187 DIP("%sfxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12189 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12190 as if components 0 and 1 are set as present in XSTATE_BV in the
12191 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12192 generate the XRSTOR sequence accordingly, and let iropt fold out
12193 the unused (AVX) parts accordingly. */
12194 IRTemp three
= newTemp(Ity_I64
);
12195 assign(three
, mkU64(3));
12196 gen_XRSTOR_SEQUENCE(addr
, three
/*xstate_bv*/, three
/*rfbm*/);
12202 static IRTemp
math_PINSRW_128 ( IRTemp v128
, IRTemp u16
, UInt imm8
)
12204 vassert(imm8
>= 0 && imm8
<= 7);
12206 // Create a V128 value which has the selected word in the
12207 // specified lane, and zeroes everywhere else.
12208 IRTemp tmp128
= newTemp(Ity_V128
);
12209 IRTemp halfshift
= newTemp(Ity_I64
);
12210 assign(halfshift
, binop(Iop_Shl64
,
12211 unop(Iop_16Uto64
, mkexpr(u16
)),
12212 mkU8(16 * (imm8
& 3))));
12214 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
12216 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
12219 UShort mask
= ~(3 << (imm8
* 2));
12220 IRTemp res
= newTemp(Ity_V128
);
12221 assign( res
, binop(Iop_OrV128
,
12223 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
12228 static IRTemp
math_PSADBW_128 ( IRTemp dV
, IRTemp sV
)
12230 IRTemp s1
, s0
, d1
, d0
;
12231 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
12233 breakupV128to64s( sV
, &s1
, &s0
);
12234 breakupV128to64s( dV
, &d1
, &d0
);
12236 IRTemp res
= newTemp(Ity_V128
);
12238 binop(Iop_64HLtoV128
,
12239 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12240 "amd64g_calculate_mmx_psadbw",
12241 &amd64g_calculate_mmx_psadbw
,
12242 mkIRExprVec_2( mkexpr(s1
), mkexpr(d1
))),
12243 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12244 "amd64g_calculate_mmx_psadbw",
12245 &amd64g_calculate_mmx_psadbw
,
12246 mkIRExprVec_2( mkexpr(s0
), mkexpr(d0
)))) );
12251 static IRTemp
math_PSADBW_256 ( IRTemp dV
, IRTemp sV
)
12253 IRTemp sHi
, sLo
, dHi
, dLo
;
12254 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
12255 breakupV256toV128s( dV
, &dHi
, &dLo
);
12256 breakupV256toV128s( sV
, &sHi
, &sLo
);
12257 IRTemp res
= newTemp(Ity_V256
);
12258 assign(res
, binop(Iop_V128HLtoV256
,
12259 mkexpr(math_PSADBW_128(dHi
, sHi
)),
12260 mkexpr(math_PSADBW_128(dLo
, sLo
))));
12265 static Long
dis_MASKMOVDQU ( const VexAbiInfo
* vbi
, Prefix pfx
,
12266 Long delta
, Bool isAvx
)
12268 IRTemp regD
= newTemp(Ity_V128
);
12269 IRTemp mask
= newTemp(Ity_V128
);
12270 IRTemp olddata
= newTemp(Ity_V128
);
12271 IRTemp newdata
= newTemp(Ity_V128
);
12272 IRTemp addr
= newTemp(Ity_I64
);
12273 UChar modrm
= getUChar(delta
);
12274 UInt rG
= gregOfRexRM(pfx
,modrm
);
12275 UInt rE
= eregOfRexRM(pfx
,modrm
);
12277 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
12278 assign( regD
, getXMMReg( rG
));
12280 /* Unfortunately can't do the obvious thing with SarN8x16
12281 here since that can't be re-emitted as SSE2 code - no such
12284 binop(Iop_64HLtoV128
,
12286 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ),
12289 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ),
12291 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
12292 assign( newdata
, binop(Iop_OrV128
,
12298 unop(Iop_NotV128
, mkexpr(mask
)))) );
12299 storeLE( mkexpr(addr
), mkexpr(newdata
) );
12302 DIP("%smaskmovdqu %s,%s\n", isAvx
? "v" : "",
12303 nameXMMReg(rE
), nameXMMReg(rG
) );
12308 static Long
dis_MOVMSKPS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12309 Long delta
, Bool isAvx
)
12311 UChar modrm
= getUChar(delta
);
12312 UInt rG
= gregOfRexRM(pfx
,modrm
);
12313 UInt rE
= eregOfRexRM(pfx
,modrm
);
12314 IRTemp t0
= newTemp(Ity_I32
);
12315 IRTemp t1
= newTemp(Ity_I32
);
12316 IRTemp t2
= newTemp(Ity_I32
);
12317 IRTemp t3
= newTemp(Ity_I32
);
12319 assign( t0
, binop( Iop_And32
,
12320 binop(Iop_Shr32
, getXMMRegLane32(rE
,0), mkU8(31)),
12322 assign( t1
, binop( Iop_And32
,
12323 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(30)),
12325 assign( t2
, binop( Iop_And32
,
12326 binop(Iop_Shr32
, getXMMRegLane32(rE
,2), mkU8(29)),
12328 assign( t3
, binop( Iop_And32
,
12329 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(28)),
12331 putIReg32( rG
, binop(Iop_Or32
,
12332 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12333 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12334 DIP("%smovmskps %s,%s\n", isAvx
? "v" : "",
12335 nameXMMReg(rE
), nameIReg32(rG
));
12340 static Long
dis_MOVMSKPS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12342 UChar modrm
= getUChar(delta
);
12343 UInt rG
= gregOfRexRM(pfx
,modrm
);
12344 UInt rE
= eregOfRexRM(pfx
,modrm
);
12345 IRTemp t0
= newTemp(Ity_I32
);
12346 IRTemp t1
= newTemp(Ity_I32
);
12347 IRTemp t2
= newTemp(Ity_I32
);
12348 IRTemp t3
= newTemp(Ity_I32
);
12349 IRTemp t4
= newTemp(Ity_I32
);
12350 IRTemp t5
= newTemp(Ity_I32
);
12351 IRTemp t6
= newTemp(Ity_I32
);
12352 IRTemp t7
= newTemp(Ity_I32
);
12354 assign( t0
, binop( Iop_And32
,
12355 binop(Iop_Shr32
, getYMMRegLane32(rE
,0), mkU8(31)),
12357 assign( t1
, binop( Iop_And32
,
12358 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(30)),
12360 assign( t2
, binop( Iop_And32
,
12361 binop(Iop_Shr32
, getYMMRegLane32(rE
,2), mkU8(29)),
12363 assign( t3
, binop( Iop_And32
,
12364 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(28)),
12366 assign( t4
, binop( Iop_And32
,
12367 binop(Iop_Shr32
, getYMMRegLane32(rE
,4), mkU8(27)),
12369 assign( t5
, binop( Iop_And32
,
12370 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(26)),
12372 assign( t6
, binop( Iop_And32
,
12373 binop(Iop_Shr32
, getYMMRegLane32(rE
,6), mkU8(25)),
12375 assign( t7
, binop( Iop_And32
,
12376 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(24)),
12378 putIReg32( rG
, binop(Iop_Or32
,
12380 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12381 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ),
12383 binop(Iop_Or32
, mkexpr(t4
), mkexpr(t5
)),
12384 binop(Iop_Or32
, mkexpr(t6
), mkexpr(t7
)) ) ) );
12385 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12390 static Long
dis_MOVMSKPD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12391 Long delta
, Bool isAvx
)
12393 UChar modrm
= getUChar(delta
);
12394 UInt rG
= gregOfRexRM(pfx
,modrm
);
12395 UInt rE
= eregOfRexRM(pfx
,modrm
);
12396 IRTemp t0
= newTemp(Ity_I32
);
12397 IRTemp t1
= newTemp(Ity_I32
);
12399 assign( t0
, binop( Iop_And32
,
12400 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(31)),
12402 assign( t1
, binop( Iop_And32
,
12403 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(30)),
12405 putIReg32( rG
, binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
) ) );
12406 DIP("%smovmskpd %s,%s\n", isAvx
? "v" : "",
12407 nameXMMReg(rE
), nameIReg32(rG
));
12412 static Long
dis_MOVMSKPD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12414 UChar modrm
= getUChar(delta
);
12415 UInt rG
= gregOfRexRM(pfx
,modrm
);
12416 UInt rE
= eregOfRexRM(pfx
,modrm
);
12417 IRTemp t0
= newTemp(Ity_I32
);
12418 IRTemp t1
= newTemp(Ity_I32
);
12419 IRTemp t2
= newTemp(Ity_I32
);
12420 IRTemp t3
= newTemp(Ity_I32
);
12422 assign( t0
, binop( Iop_And32
,
12423 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(31)),
12425 assign( t1
, binop( Iop_And32
,
12426 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(30)),
12428 assign( t2
, binop( Iop_And32
,
12429 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(29)),
12431 assign( t3
, binop( Iop_And32
,
12432 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(28)),
12434 putIReg32( rG
, binop(Iop_Or32
,
12435 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12436 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12437 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12442 /* Note, this also handles SSE(1) insns. */
12443 __attribute__((noinline
))
12445 Long
dis_ESC_0F__SSE2 ( Bool
* decode_OK
,
12446 const VexArchInfo
* archinfo
,
12447 const VexAbiInfo
* vbi
,
12448 Prefix pfx
, Int sz
, Long deltaIN
,
12451 IRTemp addr
= IRTemp_INVALID
;
12452 IRTemp t0
= IRTemp_INVALID
;
12453 IRTemp t1
= IRTemp_INVALID
;
12454 IRTemp t2
= IRTemp_INVALID
;
12455 IRTemp t3
= IRTemp_INVALID
;
12456 IRTemp t4
= IRTemp_INVALID
;
12457 IRTemp t5
= IRTemp_INVALID
;
12458 IRTemp t6
= IRTemp_INVALID
;
12463 *decode_OK
= False
;
12465 Long delta
= deltaIN
;
12466 UChar opc
= getUChar(delta
);
12471 if (have66noF2noF3(pfx
)
12472 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12473 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12474 modrm
= getUChar(delta
);
12475 if (epartIsReg(modrm
)) {
12476 putXMMReg( gregOfRexRM(pfx
,modrm
),
12477 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12478 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12479 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12482 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12483 putXMMReg( gregOfRexRM(pfx
,modrm
),
12484 loadLE(Ity_V128
, mkexpr(addr
)) );
12485 DIP("movupd %s,%s\n", dis_buf
,
12486 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12489 goto decode_success
;
12491 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12492 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12493 If E is reg, upper half of G is unchanged. */
12494 if (haveF2no66noF3(pfx
)
12495 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8) ) {
12496 modrm
= getUChar(delta
);
12497 if (epartIsReg(modrm
)) {
12498 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12499 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
12500 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12501 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12504 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12505 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12506 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12507 loadLE(Ity_I64
, mkexpr(addr
)) );
12508 DIP("movsd %s,%s\n", dis_buf
,
12509 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12512 goto decode_success
;
12514 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12515 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12516 if (haveF3no66noF2(pfx
)
12517 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12518 modrm
= getUChar(delta
);
12519 if (epartIsReg(modrm
)) {
12520 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12521 getXMMRegLane32( eregOfRexRM(pfx
,modrm
), 0 ));
12522 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12523 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12526 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12527 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12528 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12529 loadLE(Ity_I32
, mkexpr(addr
)) );
12530 DIP("movss %s,%s\n", dis_buf
,
12531 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12534 goto decode_success
;
12536 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12537 if (haveNo66noF2noF3(pfx
)
12538 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12539 modrm
= getUChar(delta
);
12540 if (epartIsReg(modrm
)) {
12541 putXMMReg( gregOfRexRM(pfx
,modrm
),
12542 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12543 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12544 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12547 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12548 putXMMReg( gregOfRexRM(pfx
,modrm
),
12549 loadLE(Ity_V128
, mkexpr(addr
)) );
12550 DIP("movups %s,%s\n", dis_buf
,
12551 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12554 goto decode_success
;
12559 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12560 or lo half xmm). */
12561 if (haveF2no66noF3(pfx
)
12562 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12563 modrm
= getUChar(delta
);
12564 if (epartIsReg(modrm
)) {
12565 putXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0,
12566 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
12567 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12568 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12571 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12572 storeLE( mkexpr(addr
),
12573 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
12574 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12578 goto decode_success
;
12580 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12582 if (haveF3no66noF2(pfx
) && sz
== 4) {
12583 modrm
= getUChar(delta
);
12584 if (epartIsReg(modrm
)) {
12585 /* fall through, we don't yet have a test case */
12587 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12588 storeLE( mkexpr(addr
),
12589 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
12590 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12593 goto decode_success
;
12596 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12597 if (have66noF2noF3(pfx
)
12598 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12599 modrm
= getUChar(delta
);
12600 if (epartIsReg(modrm
)) {
12601 putXMMReg( eregOfRexRM(pfx
,modrm
),
12602 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
12603 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12604 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12607 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12608 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12609 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12613 goto decode_success
;
12615 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12616 if (haveNo66noF2noF3(pfx
)
12617 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12618 modrm
= getUChar(delta
);
12619 if (epartIsReg(modrm
)) {
12620 /* fall through; awaiting test case */
12622 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12623 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12624 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12627 goto decode_success
;
12633 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12634 /* Identical to MOVLPS ? */
12635 if (have66noF2noF3(pfx
)
12636 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12637 modrm
= getUChar(delta
);
12638 if (epartIsReg(modrm
)) {
12639 /* fall through; apparently reg-reg is not possible */
12641 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12643 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12645 loadLE(Ity_I64
, mkexpr(addr
)) );
12646 DIP("movlpd %s, %s\n",
12647 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12648 goto decode_success
;
12651 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12652 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12653 if (haveNo66noF2noF3(pfx
)
12654 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12655 modrm
= getUChar(delta
);
12656 if (epartIsReg(modrm
)) {
12658 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12660 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ));
12661 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12662 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12664 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12666 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0/*lower lane*/,
12667 loadLE(Ity_I64
, mkexpr(addr
)) );
12668 DIP("movlps %s, %s\n",
12669 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12671 goto decode_success
;
12676 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12677 if (haveNo66noF2noF3(pfx
)
12678 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12679 modrm
= getUChar(delta
);
12680 if (!epartIsReg(modrm
)) {
12681 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12683 storeLE( mkexpr(addr
),
12684 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12685 0/*lower lane*/ ) );
12686 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12688 goto decode_success
;
12690 /* else fall through */
12692 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12693 /* Identical to MOVLPS ? */
12694 if (have66noF2noF3(pfx
)
12695 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12696 modrm
= getUChar(delta
);
12697 if (!epartIsReg(modrm
)) {
12698 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12700 storeLE( mkexpr(addr
),
12701 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12702 0/*lower lane*/ ) );
12703 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12705 goto decode_success
;
12707 /* else fall through */
12713 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12714 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12715 /* These just appear to be special cases of SHUFPS */
12716 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
12717 Bool hi
= toBool(opc
== 0x15);
12718 IRTemp sV
= newTemp(Ity_V128
);
12719 IRTemp dV
= newTemp(Ity_V128
);
12720 modrm
= getUChar(delta
);
12721 UInt rG
= gregOfRexRM(pfx
,modrm
);
12722 assign( dV
, getXMMReg(rG
) );
12723 if (epartIsReg(modrm
)) {
12724 UInt rE
= eregOfRexRM(pfx
,modrm
);
12725 assign( sV
, getXMMReg(rE
) );
12727 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12728 nameXMMReg(rE
), nameXMMReg(rG
));
12730 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12731 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12733 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12734 dis_buf
, nameXMMReg(rG
));
12736 IRTemp res
= math_UNPCKxPS_128( sV
, dV
, hi
);
12737 putXMMReg( rG
, mkexpr(res
) );
12738 goto decode_success
;
12740 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12741 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12742 /* These just appear to be special cases of SHUFPS */
12743 if (have66noF2noF3(pfx
)
12744 && sz
== 2 /* could be 8 if rex also present */) {
12745 Bool hi
= toBool(opc
== 0x15);
12746 IRTemp sV
= newTemp(Ity_V128
);
12747 IRTemp dV
= newTemp(Ity_V128
);
12748 modrm
= getUChar(delta
);
12749 UInt rG
= gregOfRexRM(pfx
,modrm
);
12750 assign( dV
, getXMMReg(rG
) );
12751 if (epartIsReg(modrm
)) {
12752 UInt rE
= eregOfRexRM(pfx
,modrm
);
12753 assign( sV
, getXMMReg(rE
) );
12755 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12756 nameXMMReg(rE
), nameXMMReg(rG
));
12758 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12759 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12761 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12762 dis_buf
, nameXMMReg(rG
));
12764 IRTemp res
= math_UNPCKxPD_128( sV
, dV
, hi
);
12765 putXMMReg( rG
, mkexpr(res
) );
12766 goto decode_success
;
12771 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12772 /* These seems identical to MOVHPS. This instruction encoding is
12773 completely crazy. */
12774 if (have66noF2noF3(pfx
)
12775 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12776 modrm
= getUChar(delta
);
12777 if (epartIsReg(modrm
)) {
12778 /* fall through; apparently reg-reg is not possible */
12780 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12782 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12783 loadLE(Ity_I64
, mkexpr(addr
)) );
12784 DIP("movhpd %s,%s\n", dis_buf
,
12785 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12786 goto decode_success
;
12789 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12790 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12791 if (haveNo66noF2noF3(pfx
)
12792 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12793 modrm
= getUChar(delta
);
12794 if (epartIsReg(modrm
)) {
12796 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12797 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ) );
12798 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12799 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12801 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12803 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12804 loadLE(Ity_I64
, mkexpr(addr
)) );
12805 DIP("movhps %s,%s\n", dis_buf
,
12806 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12808 goto decode_success
;
12813 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12814 if (haveNo66noF2noF3(pfx
)
12815 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12816 modrm
= getUChar(delta
);
12817 if (!epartIsReg(modrm
)) {
12818 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12820 storeLE( mkexpr(addr
),
12821 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12822 1/*upper lane*/ ) );
12823 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12825 goto decode_success
;
12827 /* else fall through */
12829 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12830 /* Again, this seems identical to MOVHPS. */
12831 if (have66noF2noF3(pfx
)
12832 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12833 modrm
= getUChar(delta
);
12834 if (!epartIsReg(modrm
)) {
12835 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12837 storeLE( mkexpr(addr
),
12838 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12839 1/*upper lane*/ ) );
12840 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12842 goto decode_success
;
12844 /* else fall through */
12849 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12850 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12851 /* 0F 18 /2 = PREFETCH1 */
12852 /* 0F 18 /3 = PREFETCH2 */
12853 if (haveNo66noF2noF3(pfx
)
12854 && !epartIsReg(getUChar(delta
))
12855 && gregLO3ofRM(getUChar(delta
)) >= 0
12856 && gregLO3ofRM(getUChar(delta
)) <= 3) {
12857 const HChar
* hintstr
= "??";
12859 modrm
= getUChar(delta
);
12860 vassert(!epartIsReg(modrm
));
12862 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12865 switch (gregLO3ofRM(modrm
)) {
12866 case 0: hintstr
= "nta"; break;
12867 case 1: hintstr
= "t0"; break;
12868 case 2: hintstr
= "t1"; break;
12869 case 3: hintstr
= "t2"; break;
12870 default: vassert(0);
12873 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
12874 goto decode_success
;
12879 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12880 if (have66noF2noF3(pfx
)
12881 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12882 modrm
= getUChar(delta
);
12883 if (epartIsReg(modrm
)) {
12884 putXMMReg( gregOfRexRM(pfx
,modrm
),
12885 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12886 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12887 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12890 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12891 gen_SEGV_if_not_16_aligned( addr
);
12892 putXMMReg( gregOfRexRM(pfx
,modrm
),
12893 loadLE(Ity_V128
, mkexpr(addr
)) );
12894 DIP("movapd %s,%s\n", dis_buf
,
12895 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12898 goto decode_success
;
12900 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12901 if (haveNo66noF2noF3(pfx
)
12902 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12903 modrm
= getUChar(delta
);
12904 if (epartIsReg(modrm
)) {
12905 putXMMReg( gregOfRexRM(pfx
,modrm
),
12906 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12907 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12908 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12911 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12912 gen_SEGV_if_not_16_aligned( addr
);
12913 putXMMReg( gregOfRexRM(pfx
,modrm
),
12914 loadLE(Ity_V128
, mkexpr(addr
)) );
12915 DIP("movaps %s,%s\n", dis_buf
,
12916 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12919 goto decode_success
;
12924 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12925 if (haveNo66noF2noF3(pfx
)
12926 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12927 modrm
= getUChar(delta
);
12928 if (epartIsReg(modrm
)) {
12929 putXMMReg( eregOfRexRM(pfx
,modrm
),
12930 getXMMReg( gregOfRexRM(pfx
,modrm
) ));
12931 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12932 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12935 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12936 gen_SEGV_if_not_16_aligned( addr
);
12937 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12938 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12942 goto decode_success
;
12944 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12945 if (have66noF2noF3(pfx
)
12946 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12947 modrm
= getUChar(delta
);
12948 if (epartIsReg(modrm
)) {
12949 putXMMReg( eregOfRexRM(pfx
,modrm
),
12950 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
12951 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12952 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12955 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12956 gen_SEGV_if_not_16_aligned( addr
);
12957 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12958 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12962 goto decode_success
;
12967 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12969 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
12970 IRTemp arg64
= newTemp(Ity_I64
);
12971 IRTemp rmode
= newTemp(Ity_I32
);
12973 modrm
= getUChar(delta
);
12974 if (epartIsReg(modrm
)) {
12975 /* Only switch to MMX mode if the source is a MMX register.
12976 See comments on CVTPI2PD for details. Fixes #357059. */
12978 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
12980 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
12981 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12983 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12984 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
12986 DIP("cvtpi2ps %s,%s\n", dis_buf
,
12987 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
12990 assign( rmode
, get_sse_roundingmode() );
12993 gregOfRexRM(pfx
,modrm
), 0,
12994 binop(Iop_F64toF32
,
12996 unop(Iop_I32StoF64
,
12997 unop(Iop_64to32
, mkexpr(arg64
)) )) );
13000 gregOfRexRM(pfx
,modrm
), 1,
13001 binop(Iop_F64toF32
,
13003 unop(Iop_I32StoF64
,
13004 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
13006 goto decode_success
;
13008 /* F3 0F 2A = CVTSI2SS
13009 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
13010 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
13011 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13012 IRTemp rmode
= newTemp(Ity_I32
);
13013 assign( rmode
, get_sse_roundingmode() );
13014 modrm
= getUChar(delta
);
13016 IRTemp arg32
= newTemp(Ity_I32
);
13017 if (epartIsReg(modrm
)) {
13018 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13020 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13021 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13023 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13024 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13026 DIP("cvtsi2ss %s,%s\n", dis_buf
,
13027 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13030 gregOfRexRM(pfx
,modrm
), 0,
13031 binop(Iop_F64toF32
,
13033 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
13036 IRTemp arg64
= newTemp(Ity_I64
);
13037 if (epartIsReg(modrm
)) {
13038 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13040 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13041 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13043 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13044 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13046 DIP("cvtsi2ssq %s,%s\n", dis_buf
,
13047 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13050 gregOfRexRM(pfx
,modrm
), 0,
13051 binop(Iop_F64toF32
,
13053 binop(Iop_I64StoF64
, mkexpr(rmode
), mkexpr(arg64
)) ) );
13055 goto decode_success
;
13057 /* F2 0F 2A = CVTSI2SD
13058 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13059 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13061 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13062 modrm
= getUChar(delta
);
13064 IRTemp arg32
= newTemp(Ity_I32
);
13065 if (epartIsReg(modrm
)) {
13066 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13068 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13069 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13071 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13072 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13074 DIP("cvtsi2sdl %s,%s\n", dis_buf
,
13075 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13077 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13078 unop(Iop_I32StoF64
, mkexpr(arg32
))
13082 IRTemp arg64
= newTemp(Ity_I64
);
13083 if (epartIsReg(modrm
)) {
13084 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13086 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13087 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13089 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13090 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13092 DIP("cvtsi2sdq %s,%s\n", dis_buf
,
13093 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13096 gregOfRexRM(pfx
,modrm
),
13098 binop( Iop_I64StoF64
,
13099 get_sse_roundingmode(),
13104 goto decode_success
;
13106 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13108 if (have66noF2noF3(pfx
) && sz
== 2) {
13109 IRTemp arg64
= newTemp(Ity_I64
);
13111 modrm
= getUChar(delta
);
13112 if (epartIsReg(modrm
)) {
13113 /* Only switch to MMX mode if the source is a MMX register.
13114 This is inconsistent with all other instructions which
13115 convert between XMM and (M64 or MMX), which always switch
13116 to MMX mode even if 64-bit operand is M64 and not MMX. At
13117 least, that's what the Intel docs seem to me to say.
13120 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
13122 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
13123 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13125 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13126 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13128 DIP("cvtpi2pd %s,%s\n", dis_buf
,
13129 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13133 gregOfRexRM(pfx
,modrm
), 0,
13134 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
13138 gregOfRexRM(pfx
,modrm
), 1,
13139 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
13142 goto decode_success
;
13147 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13148 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13149 if ( (haveNo66noF2noF3(pfx
) && sz
== 4)
13150 || (have66noF2noF3(pfx
) && sz
== 2) ) {
13151 modrm
= getUChar(delta
);
13152 if (!epartIsReg(modrm
)) {
13153 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13154 gen_SEGV_if_not_16_aligned( addr
);
13155 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13156 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
13158 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13160 goto decode_success
;
13162 /* else fall through */
13168 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13169 I32 in mmx, according to prevailing SSE rounding mode */
13170 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13171 I32 in mmx, rounding towards zero */
13172 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13173 IRTemp dst64
= newTemp(Ity_I64
);
13174 IRTemp rmode
= newTemp(Ity_I32
);
13175 IRTemp f32lo
= newTemp(Ity_F32
);
13176 IRTemp f32hi
= newTemp(Ity_F32
);
13177 Bool r2zero
= toBool(opc
== 0x2C);
13180 modrm
= getUChar(delta
);
13182 if (epartIsReg(modrm
)) {
13184 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13185 assign(f32hi
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 1));
13186 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13187 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13188 nameMMXReg(gregLO3ofRM(modrm
)));
13190 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13191 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13192 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add64
,
13196 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13198 nameMMXReg(gregLO3ofRM(modrm
)));
13202 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13204 assign( rmode
, get_sse_roundingmode() );
13209 binop( Iop_32HLto64
,
13210 binop( Iop_F64toI32S
,
13212 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
13213 binop( Iop_F64toI32S
,
13215 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
13219 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13220 goto decode_success
;
13222 /* F3 0F 2D = CVTSS2SI
13223 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13224 according to prevailing SSE rounding mode
13225 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13226 according to prevailing SSE rounding mode
13228 /* F3 0F 2C = CVTTSS2SI
13229 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13230 truncating towards zero
13231 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13232 truncating towards zero
13234 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13235 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13236 goto decode_success
;
13238 /* F2 0F 2D = CVTSD2SI
13239 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13240 according to prevailing SSE rounding mode
13241 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13242 according to prevailing SSE rounding mode
13244 /* F2 0F 2C = CVTTSD2SI
13245 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13246 truncating towards zero
13247 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13248 truncating towards zero
13250 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13251 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13252 goto decode_success
;
13254 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13255 I32 in mmx, according to prevailing SSE rounding mode */
13256 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13257 I32 in mmx, rounding towards zero */
13258 if (have66noF2noF3(pfx
) && sz
== 2) {
13259 IRTemp dst64
= newTemp(Ity_I64
);
13260 IRTemp rmode
= newTemp(Ity_I32
);
13261 IRTemp f64lo
= newTemp(Ity_F64
);
13262 IRTemp f64hi
= newTemp(Ity_F64
);
13263 Bool r2zero
= toBool(opc
== 0x2C);
13266 modrm
= getUChar(delta
);
13268 if (epartIsReg(modrm
)) {
13270 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13271 assign(f64hi
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 1));
13272 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
13273 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13274 nameMMXReg(gregLO3ofRM(modrm
)));
13276 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13277 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13278 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add64
,
13282 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
13284 nameMMXReg(gregLO3ofRM(modrm
)));
13288 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13290 assign( rmode
, get_sse_roundingmode() );
13295 binop( Iop_32HLto64
,
13296 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
13297 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
13301 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13302 goto decode_success
;
13308 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13309 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13310 if (have66noF2noF3(pfx
) && sz
== 2) {
13311 delta
= dis_COMISD( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13312 goto decode_success
;
13314 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13315 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13316 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13317 delta
= dis_COMISS( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13318 goto decode_success
;
13323 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13324 to 4 lowest bits of ireg(G) */
13325 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
13326 && epartIsReg(getUChar(delta
))) {
13327 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13328 set to 1, which has been known to happen:
13330 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13332 20071106: Intel docs say that REX.W isn't redundant: when
13333 present, a 64-bit register is written; when not present, only
13334 the 32-bit half is written. However, testing on a Core2
13335 machine suggests the entire 64 bit register is written
13336 irrespective of the status of REX.W. That could be because
13337 of the default rule that says "if the lower half of a 32-bit
13338 register is written, the upper half is zeroed". By using
13339 putIReg32 here we inadvertantly produce the same behaviour as
13340 the Core2, for the same reason -- putIReg32 implements said
13343 AMD docs give no indication that REX.W is even valid for this
13345 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13346 goto decode_success
;
13348 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13349 2 lowest bits of ireg(G) */
13350 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
13351 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13352 set to 1, which has been known to happen:
13353 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13354 20071106: see further comments on MOVMSKPS implementation above.
13356 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13357 goto decode_success
;
13362 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13363 if (haveF3no66noF2(pfx
) && sz
== 4) {
13364 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13365 "sqrtss", Iop_Sqrt32F0x4
);
13366 goto decode_success
;
13368 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13369 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13370 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13371 "sqrtps", Iop_Sqrt32Fx4
);
13372 goto decode_success
;
13374 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13375 if (haveF2no66noF3(pfx
) && sz
== 4) {
13376 delta
= dis_SSE_E_to_G_unary_lo64( vbi
, pfx
, delta
,
13377 "sqrtsd", Iop_Sqrt64F0x2
);
13378 goto decode_success
;
13380 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13381 if (have66noF2noF3(pfx
) && sz
== 2) {
13382 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13383 "sqrtpd", Iop_Sqrt64Fx2
);
13384 goto decode_success
;
13389 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13390 if (haveF3no66noF2(pfx
) && sz
== 4) {
13391 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13392 "rsqrtss", Iop_RSqrtEst32F0x4
);
13393 goto decode_success
;
13395 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13396 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13397 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13398 "rsqrtps", Iop_RSqrtEst32Fx4
);
13399 goto decode_success
;
13404 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13405 if (haveF3no66noF2(pfx
) && sz
== 4) {
13406 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13407 "rcpss", Iop_RecipEst32F0x4
);
13408 goto decode_success
;
13410 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13411 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13412 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13413 "rcpps", Iop_RecipEst32Fx4
);
13414 goto decode_success
;
13419 /* 0F 54 = ANDPS -- G = G and E */
13420 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13421 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andps", Iop_AndV128
);
13422 goto decode_success
;
13424 /* 66 0F 54 = ANDPD -- G = G and E */
13425 if (have66noF2noF3(pfx
) && sz
== 2) {
13426 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andpd", Iop_AndV128
);
13427 goto decode_success
;
13432 /* 0F 55 = ANDNPS -- G = (not G) and E */
13433 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13434 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnps",
13436 goto decode_success
;
13438 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13439 if (have66noF2noF3(pfx
) && sz
== 2) {
13440 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnpd",
13442 goto decode_success
;
13447 /* 0F 56 = ORPS -- G = G and E */
13448 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13449 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orps", Iop_OrV128
);
13450 goto decode_success
;
13452 /* 66 0F 56 = ORPD -- G = G and E */
13453 if (have66noF2noF3(pfx
) && sz
== 2) {
13454 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orpd", Iop_OrV128
);
13455 goto decode_success
;
13460 /* 66 0F 57 = XORPD -- G = G xor E */
13461 if (have66noF2noF3(pfx
) && sz
== 2) {
13462 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorpd", Iop_XorV128
);
13463 goto decode_success
;
13465 /* 0F 57 = XORPS -- G = G xor E */
13466 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13467 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorps", Iop_XorV128
);
13468 goto decode_success
;
13473 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13474 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13475 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addps", Iop_Add32Fx4
);
13476 goto decode_success
;
13478 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13479 if (haveF3no66noF2(pfx
) && sz
== 4) {
13480 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "addss", Iop_Add32F0x4
);
13481 goto decode_success
;
13483 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13484 if (haveF2no66noF3(pfx
)
13485 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13486 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "addsd", Iop_Add64F0x2
);
13487 goto decode_success
;
13489 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13490 if (have66noF2noF3(pfx
)
13491 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13492 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addpd", Iop_Add64Fx2
);
13493 goto decode_success
;
13498 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13499 if (haveF2no66noF3(pfx
)
13500 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13501 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "mulsd", Iop_Mul64F0x2
);
13502 goto decode_success
;
13504 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13505 if (haveF3no66noF2(pfx
) && sz
== 4) {
13506 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "mulss", Iop_Mul32F0x4
);
13507 goto decode_success
;
13509 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13510 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13511 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulps", Iop_Mul32Fx4
);
13512 goto decode_success
;
13514 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13515 if (have66noF2noF3(pfx
)
13516 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13517 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulpd", Iop_Mul64Fx2
);
13518 goto decode_success
;
13523 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13525 if (haveNo66noF2noF3(pfx
)
13526 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13527 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13528 goto decode_success
;
13530 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13532 if (haveF3no66noF2(pfx
) && sz
== 4) {
13533 IRTemp f32lo
= newTemp(Ity_F32
);
13535 modrm
= getUChar(delta
);
13536 if (epartIsReg(modrm
)) {
13538 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13539 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13540 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13542 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13543 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13545 DIP("cvtss2sd %s,%s\n", dis_buf
,
13546 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13549 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13550 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
13552 goto decode_success
;
13554 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13555 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13556 if (haveF2no66noF3(pfx
) && sz
== 4) {
13557 IRTemp rmode
= newTemp(Ity_I32
);
13558 IRTemp f64lo
= newTemp(Ity_F64
);
13560 modrm
= getUChar(delta
);
13561 if (epartIsReg(modrm
)) {
13563 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13564 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13565 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13567 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13568 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13570 DIP("cvtsd2ss %s,%s\n", dis_buf
,
13571 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13574 assign( rmode
, get_sse_roundingmode() );
13576 gregOfRexRM(pfx
,modrm
), 0,
13577 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
13580 goto decode_success
;
13582 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13583 lo half xmm(G), rounding according to prevailing SSE rounding
13584 mode, and zero upper half */
13585 /* Note, this is practically identical to CVTPD2DQ. It would have
13586 be nice to merge them together. */
13587 if (have66noF2noF3(pfx
) && sz
== 2) {
13588 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13589 goto decode_success
;
13594 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13595 xmm(G), rounding towards zero */
13596 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13597 xmm(G), as per the prevailing rounding mode */
13598 if ( (have66noF2noF3(pfx
) && sz
== 2)
13599 || (haveF3no66noF2(pfx
) && sz
== 4) ) {
13600 Bool r2zero
= toBool(sz
== 4); // FIXME -- unreliable (???)
13601 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/, r2zero
);
13602 goto decode_success
;
13604 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13606 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13607 delta
= dis_CVTDQ2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13608 goto decode_success
;
13613 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13614 if (haveF3no66noF2(pfx
) && sz
== 4) {
13615 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "subss", Iop_Sub32F0x4
);
13616 goto decode_success
;
13618 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13619 if (haveF2no66noF3(pfx
)
13620 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13621 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "subsd", Iop_Sub64F0x2
);
13622 goto decode_success
;
13624 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13625 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13626 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subps", Iop_Sub32Fx4
);
13627 goto decode_success
;
13629 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13630 if (have66noF2noF3(pfx
) && sz
== 2) {
13631 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subpd", Iop_Sub64Fx2
);
13632 goto decode_success
;
13637 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13638 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13639 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minps", Iop_Min32Fx4
);
13640 goto decode_success
;
13642 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13643 if (haveF3no66noF2(pfx
) && sz
== 4) {
13644 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "minss", Iop_Min32F0x4
);
13645 goto decode_success
;
13647 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13648 if (haveF2no66noF3(pfx
)
13649 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13650 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "minsd", Iop_Min64F0x2
);
13651 goto decode_success
;
13653 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13654 if (have66noF2noF3(pfx
) && sz
== 2) {
13655 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minpd", Iop_Min64Fx2
);
13656 goto decode_success
;
13661 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13662 if (haveF2no66noF3(pfx
) && sz
== 4) {
13663 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "divsd", Iop_Div64F0x2
);
13664 goto decode_success
;
13666 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13667 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13668 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divps", Iop_Div32Fx4
);
13669 goto decode_success
;
13671 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13672 if (haveF3no66noF2(pfx
) && sz
== 4) {
13673 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "divss", Iop_Div32F0x4
);
13674 goto decode_success
;
13676 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13677 if (have66noF2noF3(pfx
) && sz
== 2) {
13678 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divpd", Iop_Div64Fx2
);
13679 goto decode_success
;
13684 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13685 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13686 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxps", Iop_Max32Fx4
);
13687 goto decode_success
;
13689 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13690 if (haveF3no66noF2(pfx
) && sz
== 4) {
13691 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "maxss", Iop_Max32F0x4
);
13692 goto decode_success
;
13694 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13695 if (haveF2no66noF3(pfx
)
13696 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13697 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "maxsd", Iop_Max64F0x2
);
13698 goto decode_success
;
13700 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13701 if (have66noF2noF3(pfx
) && sz
== 2) {
13702 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxpd", Iop_Max64Fx2
);
13703 goto decode_success
;
13708 /* 66 0F 60 = PUNPCKLBW */
13709 if (have66noF2noF3(pfx
) && sz
== 2) {
13710 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13712 Iop_InterleaveLO8x16
, True
);
13713 goto decode_success
;
13718 /* 66 0F 61 = PUNPCKLWD */
13719 if (have66noF2noF3(pfx
) && sz
== 2) {
13720 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13722 Iop_InterleaveLO16x8
, True
);
13723 goto decode_success
;
13728 /* 66 0F 62 = PUNPCKLDQ */
13729 if (have66noF2noF3(pfx
) && sz
== 2) {
13730 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13732 Iop_InterleaveLO32x4
, True
);
13733 goto decode_success
;
13738 /* 66 0F 63 = PACKSSWB */
13739 if (have66noF2noF3(pfx
) && sz
== 2) {
13740 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13742 Iop_QNarrowBin16Sto8Sx16
, True
);
13743 goto decode_success
;
13748 /* 66 0F 64 = PCMPGTB */
13749 if (have66noF2noF3(pfx
) && sz
== 2) {
13750 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13751 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
13752 goto decode_success
;
13757 /* 66 0F 65 = PCMPGTW */
13758 if (have66noF2noF3(pfx
) && sz
== 2) {
13759 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13760 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
13761 goto decode_success
;
13766 /* 66 0F 66 = PCMPGTD */
13767 if (have66noF2noF3(pfx
) && sz
== 2) {
13768 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13769 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
13770 goto decode_success
;
13775 /* 66 0F 67 = PACKUSWB */
13776 if (have66noF2noF3(pfx
) && sz
== 2) {
13777 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13779 Iop_QNarrowBin16Sto8Ux16
, True
);
13780 goto decode_success
;
13785 /* 66 0F 68 = PUNPCKHBW */
13786 if (have66noF2noF3(pfx
) && sz
== 2) {
13787 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13789 Iop_InterleaveHI8x16
, True
);
13790 goto decode_success
;
13795 /* 66 0F 69 = PUNPCKHWD */
13796 if (have66noF2noF3(pfx
) && sz
== 2) {
13797 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13799 Iop_InterleaveHI16x8
, True
);
13800 goto decode_success
;
13805 /* 66 0F 6A = PUNPCKHDQ */
13806 if (have66noF2noF3(pfx
) && sz
== 2) {
13807 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13809 Iop_InterleaveHI32x4
, True
);
13810 goto decode_success
;
13815 /* 66 0F 6B = PACKSSDW */
13816 if (have66noF2noF3(pfx
) && sz
== 2) {
13817 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13819 Iop_QNarrowBin32Sto16Sx8
, True
);
13820 goto decode_success
;
13825 /* 66 0F 6C = PUNPCKLQDQ */
13826 if (have66noF2noF3(pfx
) && sz
== 2) {
13827 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13829 Iop_InterleaveLO64x2
, True
);
13830 goto decode_success
;
13835 /* 66 0F 6D = PUNPCKHQDQ */
13836 if (have66noF2noF3(pfx
) && sz
== 2) {
13837 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13839 Iop_InterleaveHI64x2
, True
);
13840 goto decode_success
;
13845 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13846 zeroing high 3/4 of xmm. */
13847 /* or from ireg64/m64 to xmm lo 1/2,
13848 zeroing high 1/2 of xmm. */
13849 if (have66noF2noF3(pfx
)) {
13850 vassert(sz
== 2 || sz
== 8);
13851 if (sz
== 2) sz
= 4;
13852 modrm
= getUChar(delta
);
13853 if (epartIsReg(modrm
)) {
13857 gregOfRexRM(pfx
,modrm
),
13858 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
13860 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13861 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13864 gregOfRexRM(pfx
,modrm
),
13865 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
13867 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13868 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13871 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13874 gregOfRexRM(pfx
,modrm
),
13876 ? unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
13877 : unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)) )
13879 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q', dis_buf
,
13880 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13882 goto decode_success
;
13887 if (have66noF2noF3(pfx
)
13888 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13889 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13890 modrm
= getUChar(delta
);
13891 if (epartIsReg(modrm
)) {
13892 putXMMReg( gregOfRexRM(pfx
,modrm
),
13893 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13894 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13895 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13898 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13899 gen_SEGV_if_not_16_aligned( addr
);
13900 putXMMReg( gregOfRexRM(pfx
,modrm
),
13901 loadLE(Ity_V128
, mkexpr(addr
)) );
13902 DIP("movdqa %s,%s\n", dis_buf
,
13903 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13906 goto decode_success
;
13908 if (haveF3no66noF2(pfx
) && sz
== 4) {
13909 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13910 modrm
= getUChar(delta
);
13911 if (epartIsReg(modrm
)) {
13912 putXMMReg( gregOfRexRM(pfx
,modrm
),
13913 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13914 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13915 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13918 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13919 putXMMReg( gregOfRexRM(pfx
,modrm
),
13920 loadLE(Ity_V128
, mkexpr(addr
)) );
13921 DIP("movdqu %s,%s\n", dis_buf
,
13922 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13925 goto decode_success
;
13930 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13931 if (have66noF2noF3(pfx
) && sz
== 2) {
13932 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, False
/*!writesYmm*/);
13933 goto decode_success
;
13935 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13936 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13937 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13939 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
13940 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
13941 sV
= newTemp(Ity_I64
);
13942 dV
= newTemp(Ity_I64
);
13944 modrm
= getUChar(delta
);
13945 if (epartIsReg(modrm
)) {
13946 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
13947 order
= (Int
)getUChar(delta
+1);
13949 DIP("pshufw $%d,%s,%s\n", order
,
13950 nameMMXReg(eregLO3ofRM(modrm
)),
13951 nameMMXReg(gregLO3ofRM(modrm
)));
13953 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
13954 1/*extra byte after amode*/ );
13955 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
13956 order
= (Int
)getUChar(delta
+alen
);
13958 DIP("pshufw $%d,%s,%s\n", order
,
13960 nameMMXReg(gregLO3ofRM(modrm
)));
13962 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
13964 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13966 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
13967 SEL((order
>>2)&3), SEL((order
>>0)&3) )
13969 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dV
));
13971 goto decode_success
;
13973 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13974 mem) to G(xmm), and copy upper half */
13975 if (haveF2no66noF3(pfx
) && sz
== 4) {
13976 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
13977 False
/*!isAvx*/, False
/*!xIsH*/ );
13978 goto decode_success
;
13980 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13981 mem) to G(xmm), and copy lower half */
13982 if (haveF3no66noF2(pfx
) && sz
== 4) {
13983 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
13984 False
/*!isAvx*/, True
/*xIsH*/ );
13985 goto decode_success
;
13990 /* 66 0F 71 /2 ib = PSRLW by immediate */
13991 if (have66noF2noF3(pfx
) && sz
== 2
13992 && epartIsReg(getUChar(delta
))
13993 && gregLO3ofRM(getUChar(delta
)) == 2) {
13994 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlw", Iop_ShrN16x8
);
13995 goto decode_success
;
13997 /* 66 0F 71 /4 ib = PSRAW by immediate */
13998 if (have66noF2noF3(pfx
) && sz
== 2
13999 && epartIsReg(getUChar(delta
))
14000 && gregLO3ofRM(getUChar(delta
)) == 4) {
14001 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psraw", Iop_SarN16x8
);
14002 goto decode_success
;
14004 /* 66 0F 71 /6 ib = PSLLW by immediate */
14005 if (have66noF2noF3(pfx
) && sz
== 2
14006 && epartIsReg(getUChar(delta
))
14007 && gregLO3ofRM(getUChar(delta
)) == 6) {
14008 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllw", Iop_ShlN16x8
);
14009 goto decode_success
;
14014 /* 66 0F 72 /2 ib = PSRLD by immediate */
14015 if (have66noF2noF3(pfx
) && sz
== 2
14016 && epartIsReg(getUChar(delta
))
14017 && gregLO3ofRM(getUChar(delta
)) == 2) {
14018 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrld", Iop_ShrN32x4
);
14019 goto decode_success
;
14021 /* 66 0F 72 /4 ib = PSRAD by immediate */
14022 if (have66noF2noF3(pfx
) && sz
== 2
14023 && epartIsReg(getUChar(delta
))
14024 && gregLO3ofRM(getUChar(delta
)) == 4) {
14025 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrad", Iop_SarN32x4
);
14026 goto decode_success
;
14028 /* 66 0F 72 /6 ib = PSLLD by immediate */
14029 if (have66noF2noF3(pfx
) && sz
== 2
14030 && epartIsReg(getUChar(delta
))
14031 && gregLO3ofRM(getUChar(delta
)) == 6) {
14032 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "pslld", Iop_ShlN32x4
);
14033 goto decode_success
;
14038 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14039 /* note, if mem case ever filled in, 1 byte after amode */
14040 if (have66noF2noF3(pfx
) && sz
== 2
14041 && epartIsReg(getUChar(delta
))
14042 && gregLO3ofRM(getUChar(delta
)) == 3) {
14043 Int imm
= (Int
)getUChar(delta
+1);
14044 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14045 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
14047 IRTemp sV
= newTemp(Ity_V128
);
14048 assign( sV
, getXMMReg(reg
) );
14049 putXMMReg(reg
, mkexpr(math_PSRLDQ( sV
, imm
)));
14050 goto decode_success
;
14052 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14053 /* note, if mem case ever filled in, 1 byte after amode */
14054 if (have66noF2noF3(pfx
) && sz
== 2
14055 && epartIsReg(getUChar(delta
))
14056 && gregLO3ofRM(getUChar(delta
)) == 7) {
14057 Int imm
= (Int
)getUChar(delta
+1);
14058 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14059 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
14060 vassert(imm
>= 0 && imm
<= 255);
14062 IRTemp sV
= newTemp(Ity_V128
);
14063 assign( sV
, getXMMReg(reg
) );
14064 putXMMReg(reg
, mkexpr(math_PSLLDQ( sV
, imm
)));
14065 goto decode_success
;
14067 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14068 if (have66noF2noF3(pfx
) && sz
== 2
14069 && epartIsReg(getUChar(delta
))
14070 && gregLO3ofRM(getUChar(delta
)) == 2) {
14071 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14072 goto decode_success
;
14074 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14075 if (have66noF2noF3(pfx
) && sz
== 2
14076 && epartIsReg(getUChar(delta
))
14077 && gregLO3ofRM(getUChar(delta
)) == 6) {
14078 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllq", Iop_ShlN64x2
);
14079 goto decode_success
;
14084 /* 66 0F 74 = PCMPEQB */
14085 if (have66noF2noF3(pfx
) && sz
== 2) {
14086 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14087 "pcmpeqb", Iop_CmpEQ8x16
, False
);
14088 goto decode_success
;
14093 /* 66 0F 75 = PCMPEQW */
14094 if (have66noF2noF3(pfx
) && sz
== 2) {
14095 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14096 "pcmpeqw", Iop_CmpEQ16x8
, False
);
14097 goto decode_success
;
14102 /* 66 0F 76 = PCMPEQD */
14103 if (have66noF2noF3(pfx
) && sz
== 2) {
14104 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14105 "pcmpeqd", Iop_CmpEQ32x4
, False
);
14106 goto decode_success
;
14111 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14112 G (lo half xmm). Upper half of G is zeroed out. */
14113 if (haveF3no66noF2(pfx
)
14114 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14115 modrm
= getUChar(delta
);
14116 if (epartIsReg(modrm
)) {
14117 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14118 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14119 /* zero bits 127:64 */
14120 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1, mkU64(0) );
14121 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14122 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14125 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14126 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
14127 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14128 loadLE(Ity_I64
, mkexpr(addr
)) );
14129 DIP("movsd %s,%s\n", dis_buf
,
14130 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14133 goto decode_success
;
14135 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14136 /* or from xmm low 1/2 to ireg64 or m64. */
14137 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
14138 if (sz
== 2) sz
= 4;
14139 modrm
= getUChar(delta
);
14140 if (epartIsReg(modrm
)) {
14143 putIReg32( eregOfRexRM(pfx
,modrm
),
14144 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
14145 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14146 nameIReg32(eregOfRexRM(pfx
,modrm
)));
14148 putIReg64( eregOfRexRM(pfx
,modrm
),
14149 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
14150 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14151 nameIReg64(eregOfRexRM(pfx
,modrm
)));
14154 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14156 storeLE( mkexpr(addr
),
14158 ? getXMMRegLane32(gregOfRexRM(pfx
,modrm
),0)
14159 : getXMMRegLane64(gregOfRexRM(pfx
,modrm
),0) );
14160 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q',
14161 nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14163 goto decode_success
;
14168 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14169 if (haveF3no66noF2(pfx
) && sz
== 4) {
14170 modrm
= getUChar(delta
);
14171 if (epartIsReg(modrm
)) {
14172 goto decode_failure
; /* awaiting test case */
14174 putXMMReg( eregOfRexRM(pfx
,modrm
),
14175 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14176 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14177 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14179 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14181 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14182 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14184 goto decode_success
;
14186 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14187 if (have66noF2noF3(pfx
) && sz
== 2) {
14188 modrm
= getUChar(delta
);
14189 if (epartIsReg(modrm
)) {
14191 putXMMReg( eregOfRexRM(pfx
,modrm
),
14192 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14193 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14194 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14196 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14197 gen_SEGV_if_not_16_aligned( addr
);
14199 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14200 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14202 goto decode_success
;
14207 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14208 if (haveNo66noF2noF3(pfx
)
14209 && epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14212 /* Insert a memory fence. It's sometimes important that these
14213 are carried through to the generated code. */
14214 stmt( IRStmt_MBE(Imbe_Fence
) );
14216 goto decode_success
;
14218 /* mindless duplication follows .. */
14219 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14220 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14221 if (haveNo66noF2noF3(pfx
)
14222 && epartIsReg(getUChar(delta
))
14223 && (gregLO3ofRM(getUChar(delta
)) == 5
14224 || gregLO3ofRM(getUChar(delta
)) == 6)
14227 /* Insert a memory fence. It's sometimes important that these
14228 are carried through to the generated code. */
14229 stmt( IRStmt_MBE(Imbe_Fence
) );
14230 DIP("%sfence\n", gregLO3ofRM(getUChar(delta
-1))==5 ? "l" : "m");
14231 goto decode_success
;
14234 /* 0F AE /7 = CLFLUSH -- flush cache line */
14235 if (haveNo66noF2noF3(pfx
)
14236 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14239 /* This is something of a hack. We need to know the size of
14240 the cache line containing addr. Since we don't (easily),
14241 assume 256 on the basis that no real cache would have a
14242 line that big. It's safe to invalidate more stuff than we
14243 need, just inefficient. */
14244 ULong lineszB
= 256ULL;
14246 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14249 /* Round addr down to the start of the containing block. */
14254 mkU64( ~(lineszB
-1) ))) );
14256 stmt( IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
) ) );
14258 jmp_lit(dres
, Ijk_InvalICache
, (Addr64
)(guest_RIP_bbstart
+delta
));
14260 DIP("clflush %s\n", dis_buf
);
14261 goto decode_success
;
14264 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14265 if (haveNo66noF2noF3(pfx
)
14266 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
14268 delta
= dis_STMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14269 goto decode_success
;
14271 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14272 if (haveNo66noF2noF3(pfx
)
14273 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
14275 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14276 goto decode_success
;
14278 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14279 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14280 && !epartIsReg(getUChar(delta
))
14281 && gregOfRexRM(pfx
,getUChar(delta
)) == 0) {
14282 delta
= dis_FXSAVE(vbi
, pfx
, delta
, sz
);
14283 goto decode_success
;
14285 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14286 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14287 && !epartIsReg(getUChar(delta
))
14288 && gregOfRexRM(pfx
,getUChar(delta
)) == 1) {
14289 delta
= dis_FXRSTOR(vbi
, pfx
, delta
, sz
);
14290 goto decode_success
;
14292 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14293 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14294 && !epartIsReg(getUChar(delta
))
14295 && gregOfRexRM(pfx
,getUChar(delta
)) == 4
14296 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14297 delta
= dis_XSAVE(vbi
, pfx
, delta
, sz
);
14298 goto decode_success
;
14300 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14301 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14302 && !epartIsReg(getUChar(delta
))
14303 && gregOfRexRM(pfx
,getUChar(delta
)) == 5
14304 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14305 delta
= dis_XRSTOR(vbi
, pfx
, delta
, sz
);
14306 goto decode_success
;
14311 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14312 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14313 Long delta0
= delta
;
14314 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpps", True
, 4 );
14315 if (delta
> delta0
) goto decode_success
;
14317 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14318 if (haveF3no66noF2(pfx
) && sz
== 4) {
14319 Long delta0
= delta
;
14320 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpss", False
, 4 );
14321 if (delta
> delta0
) goto decode_success
;
14323 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14324 if (haveF2no66noF3(pfx
) && sz
== 4) {
14325 Long delta0
= delta
;
14326 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpsd", False
, 8 );
14327 if (delta
> delta0
) goto decode_success
;
14329 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14330 if (have66noF2noF3(pfx
) && sz
== 2) {
14331 Long delta0
= delta
;
14332 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmppd", True
, 8 );
14333 if (delta
> delta0
) goto decode_success
;
14338 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14339 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14340 modrm
= getUChar(delta
);
14341 if (!epartIsReg(modrm
)) {
14342 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14343 storeLE( mkexpr(addr
), getIRegG(sz
, pfx
, modrm
) );
14344 DIP("movnti %s,%s\n", dis_buf
,
14345 nameIRegG(sz
, pfx
, modrm
));
14347 goto decode_success
;
14349 /* else fall through */
14354 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14355 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14356 put it into the specified lane of mmx(G). */
14357 if (haveNo66noF2noF3(pfx
)
14358 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14359 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14360 mmx reg. t4 is the new lane value. t5 is the original
14361 mmx value. t6 is the new mmx value. */
14363 t4
= newTemp(Ity_I16
);
14364 t5
= newTemp(Ity_I64
);
14365 t6
= newTemp(Ity_I64
);
14366 modrm
= getUChar(delta
);
14369 assign(t5
, getMMXReg(gregLO3ofRM(modrm
)));
14370 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
14372 if (epartIsReg(modrm
)) {
14373 assign(t4
, getIReg16(eregOfRexRM(pfx
,modrm
)));
14375 lane
= getUChar(delta
-1);
14376 DIP("pinsrw $%d,%s,%s\n", lane
,
14377 nameIReg16(eregOfRexRM(pfx
,modrm
)),
14378 nameMMXReg(gregLO3ofRM(modrm
)));
14380 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14382 lane
= getUChar(delta
-1);
14383 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14384 DIP("pinsrw $%d,%s,%s\n", lane
,
14386 nameMMXReg(gregLO3ofRM(modrm
)));
14389 switch (lane
& 3) {
14390 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
14391 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
14392 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
14393 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
14394 default: vassert(0);
14396 putMMXReg(gregLO3ofRM(modrm
), mkexpr(t6
));
14397 goto decode_success
;
14399 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14400 put it into the specified lane of xmm(G). */
14401 if (have66noF2noF3(pfx
)
14402 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14404 t4
= newTemp(Ity_I16
);
14405 modrm
= getUChar(delta
);
14406 UInt rG
= gregOfRexRM(pfx
,modrm
);
14407 if (epartIsReg(modrm
)) {
14408 UInt rE
= eregOfRexRM(pfx
,modrm
);
14409 assign(t4
, getIReg16(rE
));
14411 lane
= getUChar(delta
-1);
14412 DIP("pinsrw $%d,%s,%s\n",
14413 lane
, nameIReg16(rE
), nameXMMReg(rG
));
14415 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
14416 1/*byte after the amode*/ );
14418 lane
= getUChar(delta
-1);
14419 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14420 DIP("pinsrw $%d,%s,%s\n",
14421 lane
, dis_buf
, nameXMMReg(rG
));
14423 IRTemp src_vec
= newTemp(Ity_V128
);
14424 assign(src_vec
, getXMMReg(rG
));
14425 IRTemp res_vec
= math_PINSRW_128( src_vec
, t4
, lane
& 7);
14426 putXMMReg(rG
, mkexpr(res_vec
));
14427 goto decode_success
;
14432 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14433 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14434 zero-extend of it in ireg(G). */
14435 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14436 modrm
= getUChar(delta
);
14437 if (epartIsReg(modrm
)) {
14438 IRTemp sV
= newTemp(Ity_I64
);
14439 t5
= newTemp(Ity_I16
);
14441 assign(sV
, getMMXReg(eregLO3ofRM(modrm
)));
14442 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
14443 switch (getUChar(delta
+1) & 3) {
14444 case 0: assign(t5
, mkexpr(t0
)); break;
14445 case 1: assign(t5
, mkexpr(t1
)); break;
14446 case 2: assign(t5
, mkexpr(t2
)); break;
14447 case 3: assign(t5
, mkexpr(t3
)); break;
14448 default: vassert(0);
14451 putIReg64(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto64
, mkexpr(t5
)));
14453 putIReg32(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
14454 DIP("pextrw $%d,%s,%s\n",
14455 (Int
)getUChar(delta
+1),
14456 nameMMXReg(eregLO3ofRM(modrm
)),
14457 sz
==8 ? nameIReg64(gregOfRexRM(pfx
,modrm
))
14458 : nameIReg32(gregOfRexRM(pfx
,modrm
))
14461 goto decode_success
;
14463 /* else fall through */
14464 /* note, for anyone filling in the mem case: this insn has one
14465 byte after the amode and therefore you must pass 1 as the
14466 last arg to disAMode */
14468 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14469 zero-extend of it in ireg(G). */
14470 if (have66noF2noF3(pfx
)
14471 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14472 Long delta0
= delta
;
14473 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
14475 if (delta
> delta0
) goto decode_success
;
14476 /* else fall through -- decoding has failed */
14481 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14482 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14484 IRTemp sV
= newTemp(Ity_V128
);
14485 IRTemp dV
= newTemp(Ity_V128
);
14486 modrm
= getUChar(delta
);
14487 UInt rG
= gregOfRexRM(pfx
,modrm
);
14488 assign( dV
, getXMMReg(rG
) );
14489 if (epartIsReg(modrm
)) {
14490 UInt rE
= eregOfRexRM(pfx
,modrm
);
14491 assign( sV
, getXMMReg(rE
) );
14492 imm8
= (Int
)getUChar(delta
+1);
14494 DIP("shufps $%d,%s,%s\n", imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
14496 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14497 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14498 imm8
= (Int
)getUChar(delta
+alen
);
14500 DIP("shufps $%d,%s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
));
14502 IRTemp res
= math_SHUFPS_128( sV
, dV
, imm8
);
14503 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14504 goto decode_success
;
14506 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14507 if (have66noF2noF3(pfx
) && sz
== 2) {
14509 IRTemp sV
= newTemp(Ity_V128
);
14510 IRTemp dV
= newTemp(Ity_V128
);
14512 modrm
= getUChar(delta
);
14513 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14515 if (epartIsReg(modrm
)) {
14516 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
14517 select
= (Int
)getUChar(delta
+1);
14519 DIP("shufpd $%d,%s,%s\n", select
,
14520 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14521 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14523 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14524 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14525 select
= getUChar(delta
+alen
);
14527 DIP("shufpd $%d,%s,%s\n", select
,
14529 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14532 IRTemp res
= math_SHUFPD_128( sV
, dV
, select
);
14533 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14534 goto decode_success
;
14539 /* 66 0F D1 = PSRLW by E */
14540 if (have66noF2noF3(pfx
) && sz
== 2) {
14541 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlw", Iop_ShrN16x8
);
14542 goto decode_success
;
14547 /* 66 0F D2 = PSRLD by E */
14548 if (have66noF2noF3(pfx
) && sz
== 2) {
14549 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrld", Iop_ShrN32x4
);
14550 goto decode_success
;
14555 /* 66 0F D3 = PSRLQ by E */
14556 if (have66noF2noF3(pfx
) && sz
== 2) {
14557 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14558 goto decode_success
;
14563 /* 66 0F D4 = PADDQ */
14564 if (have66noF2noF3(pfx
) && sz
== 2) {
14565 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14566 "paddq", Iop_Add64x2
, False
);
14567 goto decode_success
;
14569 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14570 /* 0F D4 = PADDQ -- add 64x1 */
14571 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14573 delta
= dis_MMXop_regmem_to_reg (
14574 vbi
, pfx
, delta
, opc
, "paddq", False
);
14575 goto decode_success
;
14580 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14581 if (have66noF2noF3(pfx
) && sz
== 2) {
14582 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14583 "pmullw", Iop_Mul16x8
, False
);
14584 goto decode_success
;
14589 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14591 if (haveF3no66noF2(pfx
) && sz
== 4) {
14592 modrm
= getUChar(delta
);
14593 if (epartIsReg(modrm
)) {
14595 putXMMReg( gregOfRexRM(pfx
,modrm
),
14596 unop(Iop_64UtoV128
, getMMXReg( eregLO3ofRM(modrm
) )) );
14597 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14598 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14600 goto decode_success
;
14602 /* apparently no mem case for this insn */
14604 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14605 or lo half xmm). */
14606 if (have66noF2noF3(pfx
)
14607 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14608 modrm
= getUChar(delta
);
14609 if (epartIsReg(modrm
)) {
14610 /* fall through, awaiting test case */
14611 /* dst: lo half copied, hi half zeroed */
14613 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14614 storeLE( mkexpr(addr
),
14615 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
14616 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14618 goto decode_success
;
14621 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14622 if (haveF2no66noF3(pfx
) && sz
== 4) {
14623 modrm
= getUChar(delta
);
14624 if (epartIsReg(modrm
)) {
14626 putMMXReg( gregLO3ofRM(modrm
),
14627 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14628 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14629 nameMMXReg(gregLO3ofRM(modrm
)));
14631 goto decode_success
;
14633 /* apparently no mem case for this insn */
14638 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14639 lanes in xmm(E), turn them into a byte, and put
14640 zero-extend of it in ireg(G). Doing this directly is just
14641 too cumbersome; give up therefore and call a helper. */
14642 if (have66noF2noF3(pfx
)
14643 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)
14644 && epartIsReg(getUChar(delta
))) { /* no memory case, it seems */
14645 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
14646 goto decode_success
;
14648 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14649 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14650 mmx(E), turn them into a byte, and put zero-extend of it in
14652 if (haveNo66noF2noF3(pfx
)
14653 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14654 modrm
= getUChar(delta
);
14655 if (epartIsReg(modrm
)) {
14657 t0
= newTemp(Ity_I64
);
14658 t1
= newTemp(Ity_I32
);
14659 assign(t0
, getMMXReg(eregLO3ofRM(modrm
)));
14660 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
14661 putIReg32(gregOfRexRM(pfx
,modrm
), mkexpr(t1
));
14662 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14663 nameIReg32(gregOfRexRM(pfx
,modrm
)));
14665 goto decode_success
;
14667 /* else fall through */
14672 /* 66 0F D8 = PSUBUSB */
14673 if (have66noF2noF3(pfx
) && sz
== 2) {
14674 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14675 "psubusb", Iop_QSub8Ux16
, False
);
14676 goto decode_success
;
14681 /* 66 0F D9 = PSUBUSW */
14682 if (have66noF2noF3(pfx
) && sz
== 2) {
14683 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14684 "psubusw", Iop_QSub16Ux8
, False
);
14685 goto decode_success
;
14690 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14691 /* 0F DA = PMINUB -- 8x8 unsigned min */
14692 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14694 delta
= dis_MMXop_regmem_to_reg (
14695 vbi
, pfx
, delta
, opc
, "pminub", False
);
14696 goto decode_success
;
14698 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14699 if (have66noF2noF3(pfx
) && sz
== 2) {
14700 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14701 "pminub", Iop_Min8Ux16
, False
);
14702 goto decode_success
;
14707 /* 66 0F DB = PAND */
14708 if (have66noF2noF3(pfx
) && sz
== 2) {
14709 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pand", Iop_AndV128
);
14710 goto decode_success
;
14715 /* 66 0F DC = PADDUSB */
14716 if (have66noF2noF3(pfx
) && sz
== 2) {
14717 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14718 "paddusb", Iop_QAdd8Ux16
, False
);
14719 goto decode_success
;
14724 /* 66 0F DD = PADDUSW */
14725 if (have66noF2noF3(pfx
) && sz
== 2) {
14726 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14727 "paddusw", Iop_QAdd16Ux8
, False
);
14728 goto decode_success
;
14733 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14734 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14735 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14737 delta
= dis_MMXop_regmem_to_reg (
14738 vbi
, pfx
, delta
, opc
, "pmaxub", False
);
14739 goto decode_success
;
14741 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14742 if (have66noF2noF3(pfx
) && sz
== 2) {
14743 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14744 "pmaxub", Iop_Max8Ux16
, False
);
14745 goto decode_success
;
14750 /* 66 0F DF = PANDN */
14751 if (have66noF2noF3(pfx
) && sz
== 2) {
14752 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "pandn", Iop_AndV128
);
14753 goto decode_success
;
14758 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14759 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14760 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14762 delta
= dis_MMXop_regmem_to_reg (
14763 vbi
, pfx
, delta
, opc
, "pavgb", False
);
14764 goto decode_success
;
14766 /* 66 0F E0 = PAVGB */
14767 if (have66noF2noF3(pfx
) && sz
== 2) {
14768 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14769 "pavgb", Iop_Avg8Ux16
, False
);
14770 goto decode_success
;
14775 /* 66 0F E1 = PSRAW by E */
14776 if (have66noF2noF3(pfx
) && sz
== 2) {
14777 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psraw", Iop_SarN16x8
);
14778 goto decode_success
;
14783 /* 66 0F E2 = PSRAD by E */
14784 if (have66noF2noF3(pfx
) && sz
== 2) {
14785 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrad", Iop_SarN32x4
);
14786 goto decode_success
;
14791 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14792 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14793 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14795 delta
= dis_MMXop_regmem_to_reg (
14796 vbi
, pfx
, delta
, opc
, "pavgw", False
);
14797 goto decode_success
;
14799 /* 66 0F E3 = PAVGW */
14800 if (have66noF2noF3(pfx
) && sz
== 2) {
14801 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14802 "pavgw", Iop_Avg16Ux8
, False
);
14803 goto decode_success
;
14808 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14809 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14810 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14812 delta
= dis_MMXop_regmem_to_reg (
14813 vbi
, pfx
, delta
, opc
, "pmuluh", False
);
14814 goto decode_success
;
14816 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14817 if (have66noF2noF3(pfx
) && sz
== 2) {
14818 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14819 "pmulhuw", Iop_MulHi16Ux8
, False
);
14820 goto decode_success
;
14825 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14826 if (have66noF2noF3(pfx
) && sz
== 2) {
14827 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14828 "pmulhw", Iop_MulHi16Sx8
, False
);
14829 goto decode_success
;
14834 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14835 lo half xmm(G), and zero upper half, rounding towards zero */
14836 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14837 lo half xmm(G), according to prevailing rounding mode, and zero
14839 if ( (haveF2no66noF3(pfx
) && sz
== 4)
14840 || (have66noF2noF3(pfx
) && sz
== 2) ) {
14841 delta
= dis_CVTxPD2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
14842 toBool(sz
== 2)/*r2zero*/);
14843 goto decode_success
;
14845 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14847 if (haveF3no66noF2(pfx
) && sz
== 4) {
14848 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, False
/*!isAvx*/);
14849 goto decode_success
;
14854 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14855 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14856 Intel manual does not say anything about the usual business of
14857 the FP reg tags getting trashed whenever an MMX insn happens.
14858 So we just leave them alone.
14860 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14861 modrm
= getUChar(delta
);
14862 if (!epartIsReg(modrm
)) {
14863 /* do_MMX_preamble(); Intel docs don't specify this */
14864 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14865 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
14866 DIP("movntq %s,%s\n", dis_buf
,
14867 nameMMXReg(gregLO3ofRM(modrm
)));
14869 goto decode_success
;
14871 /* else fall through */
14873 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14874 if (have66noF2noF3(pfx
) && sz
== 2) {
14875 modrm
= getUChar(delta
);
14876 if (!epartIsReg(modrm
)) {
14877 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14878 gen_SEGV_if_not_16_aligned( addr
);
14879 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14880 DIP("movntdq %s,%s\n", dis_buf
,
14881 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14883 goto decode_success
;
14885 /* else fall through */
14890 /* 66 0F E8 = PSUBSB */
14891 if (have66noF2noF3(pfx
) && sz
== 2) {
14892 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14893 "psubsb", Iop_QSub8Sx16
, False
);
14894 goto decode_success
;
14899 /* 66 0F E9 = PSUBSW */
14900 if (have66noF2noF3(pfx
) && sz
== 2) {
14901 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14902 "psubsw", Iop_QSub16Sx8
, False
);
14903 goto decode_success
;
14908 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14909 /* 0F EA = PMINSW -- 16x4 signed min */
14910 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14912 delta
= dis_MMXop_regmem_to_reg (
14913 vbi
, pfx
, delta
, opc
, "pminsw", False
);
14914 goto decode_success
;
14916 /* 66 0F EA = PMINSW -- 16x8 signed min */
14917 if (have66noF2noF3(pfx
) && sz
== 2) {
14918 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14919 "pminsw", Iop_Min16Sx8
, False
);
14920 goto decode_success
;
14925 /* 66 0F EB = POR */
14926 if (have66noF2noF3(pfx
) && sz
== 2) {
14927 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "por", Iop_OrV128
);
14928 goto decode_success
;
14933 /* 66 0F EC = PADDSB */
14934 if (have66noF2noF3(pfx
) && sz
== 2) {
14935 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14936 "paddsb", Iop_QAdd8Sx16
, False
);
14937 goto decode_success
;
14942 /* 66 0F ED = PADDSW */
14943 if (have66noF2noF3(pfx
) && sz
== 2) {
14944 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14945 "paddsw", Iop_QAdd16Sx8
, False
);
14946 goto decode_success
;
14951 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14952 /* 0F EE = PMAXSW -- 16x4 signed max */
14953 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14955 delta
= dis_MMXop_regmem_to_reg (
14956 vbi
, pfx
, delta
, opc
, "pmaxsw", False
);
14957 goto decode_success
;
14959 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14960 if (have66noF2noF3(pfx
) && sz
== 2) {
14961 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14962 "pmaxsw", Iop_Max16Sx8
, False
);
14963 goto decode_success
;
14968 /* 66 0F EF = PXOR */
14969 if (have66noF2noF3(pfx
) && sz
== 2) {
14970 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pxor", Iop_XorV128
);
14971 goto decode_success
;
14976 /* 66 0F F1 = PSLLW by E */
14977 if (have66noF2noF3(pfx
) && sz
== 2) {
14978 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllw", Iop_ShlN16x8
);
14979 goto decode_success
;
14984 /* 66 0F F2 = PSLLD by E */
14985 if (have66noF2noF3(pfx
) && sz
== 2) {
14986 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "pslld", Iop_ShlN32x4
);
14987 goto decode_success
;
14992 /* 66 0F F3 = PSLLQ by E */
14993 if (have66noF2noF3(pfx
) && sz
== 2) {
14994 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllq", Iop_ShlN64x2
);
14995 goto decode_success
;
15000 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15001 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
15003 if (have66noF2noF3(pfx
) && sz
== 2) {
15004 IRTemp sV
= newTemp(Ity_V128
);
15005 IRTemp dV
= newTemp(Ity_V128
);
15006 modrm
= getUChar(delta
);
15007 UInt rG
= gregOfRexRM(pfx
,modrm
);
15008 assign( dV
, getXMMReg(rG
) );
15009 if (epartIsReg(modrm
)) {
15010 UInt rE
= eregOfRexRM(pfx
,modrm
);
15011 assign( sV
, getXMMReg(rE
) );
15013 DIP("pmuludq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15015 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15016 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15018 DIP("pmuludq %s,%s\n", dis_buf
, nameXMMReg(rG
));
15020 putXMMReg( rG
, mkexpr(math_PMULUDQ_128( sV
, dV
)) );
15021 goto decode_success
;
15023 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15024 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15025 0 to form 64-bit result */
15026 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15027 IRTemp sV
= newTemp(Ity_I64
);
15028 IRTemp dV
= newTemp(Ity_I64
);
15029 t1
= newTemp(Ity_I32
);
15030 t0
= newTemp(Ity_I32
);
15031 modrm
= getUChar(delta
);
15034 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15036 if (epartIsReg(modrm
)) {
15037 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15039 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15040 nameMMXReg(gregLO3ofRM(modrm
)));
15042 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15043 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15045 DIP("pmuludq %s,%s\n", dis_buf
,
15046 nameMMXReg(gregLO3ofRM(modrm
)));
15049 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
15050 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
15051 putMMXReg( gregLO3ofRM(modrm
),
15052 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
15053 goto decode_success
;
15058 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15059 E(xmm or mem) to G(xmm) */
15060 if (have66noF2noF3(pfx
) && sz
== 2) {
15061 IRTemp sV
= newTemp(Ity_V128
);
15062 IRTemp dV
= newTemp(Ity_V128
);
15063 modrm
= getUChar(delta
);
15064 UInt rG
= gregOfRexRM(pfx
,modrm
);
15065 if (epartIsReg(modrm
)) {
15066 UInt rE
= eregOfRexRM(pfx
,modrm
);
15067 assign( sV
, getXMMReg(rE
) );
15069 DIP("pmaddwd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15071 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15072 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15074 DIP("pmaddwd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15076 assign( dV
, getXMMReg(rG
) );
15077 putXMMReg( rG
, mkexpr(math_PMADDWD_128(dV
, sV
)) );
15078 goto decode_success
;
15083 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15084 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15085 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15087 delta
= dis_MMXop_regmem_to_reg (
15088 vbi
, pfx
, delta
, opc
, "psadbw", False
);
15089 goto decode_success
;
15091 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15092 from E(xmm or mem) to G(xmm) */
15093 if (have66noF2noF3(pfx
) && sz
== 2) {
15094 IRTemp sV
= newTemp(Ity_V128
);
15095 IRTemp dV
= newTemp(Ity_V128
);
15096 modrm
= getUChar(delta
);
15097 UInt rG
= gregOfRexRM(pfx
,modrm
);
15098 if (epartIsReg(modrm
)) {
15099 UInt rE
= eregOfRexRM(pfx
,modrm
);
15100 assign( sV
, getXMMReg(rE
) );
15102 DIP("psadbw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15104 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15105 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15107 DIP("psadbw %s,%s\n", dis_buf
, nameXMMReg(rG
));
15109 assign( dV
, getXMMReg(rG
) );
15110 putXMMReg( rG
, mkexpr( math_PSADBW_128 ( dV
, sV
) ) );
15112 goto decode_success
;
15117 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15118 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15119 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15121 delta
= dis_MMX( &ok
, vbi
, pfx
, sz
, delta
-1 );
15122 if (ok
) goto decode_success
;
15124 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15125 if (have66noF2noF3(pfx
) && sz
== 2 && epartIsReg(getUChar(delta
))) {
15126 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15127 goto decode_success
;
15132 /* 66 0F F8 = PSUBB */
15133 if (have66noF2noF3(pfx
) && sz
== 2) {
15134 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15135 "psubb", Iop_Sub8x16
, False
);
15136 goto decode_success
;
15141 /* 66 0F F9 = PSUBW */
15142 if (have66noF2noF3(pfx
) && sz
== 2) {
15143 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15144 "psubw", Iop_Sub16x8
, False
);
15145 goto decode_success
;
15150 /* 66 0F FA = PSUBD */
15151 if (have66noF2noF3(pfx
) && sz
== 2) {
15152 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15153 "psubd", Iop_Sub32x4
, False
);
15154 goto decode_success
;
15159 /* 66 0F FB = PSUBQ */
15160 if (have66noF2noF3(pfx
) && sz
== 2) {
15161 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15162 "psubq", Iop_Sub64x2
, False
);
15163 goto decode_success
;
15165 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15166 /* 0F FB = PSUBQ -- sub 64x1 */
15167 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15169 delta
= dis_MMXop_regmem_to_reg (
15170 vbi
, pfx
, delta
, opc
, "psubq", False
);
15171 goto decode_success
;
15176 /* 66 0F FC = PADDB */
15177 if (have66noF2noF3(pfx
) && sz
== 2) {
15178 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15179 "paddb", Iop_Add8x16
, False
);
15180 goto decode_success
;
15185 /* 66 0F FD = PADDW */
15186 if (have66noF2noF3(pfx
) && sz
== 2) {
15187 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15188 "paddw", Iop_Add16x8
, False
);
15189 goto decode_success
;
15194 /* 66 0F FE = PADDD */
15195 if (have66noF2noF3(pfx
) && sz
== 2) {
15196 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15197 "paddd", Iop_Add32x4
, False
);
15198 goto decode_success
;
15203 goto decode_failure
;
15208 *decode_OK
= False
;
15217 /*------------------------------------------------------------*/
15219 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15221 /*------------------------------------------------------------*/
15223 static Long
dis_MOVDDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15224 Long delta
, Bool isAvx
)
15226 IRTemp addr
= IRTemp_INVALID
;
15229 IRTemp sV
= newTemp(Ity_V128
);
15230 IRTemp d0
= newTemp(Ity_I64
);
15231 UChar modrm
= getUChar(delta
);
15232 UInt rG
= gregOfRexRM(pfx
,modrm
);
15233 if (epartIsReg(modrm
)) {
15234 UInt rE
= eregOfRexRM(pfx
,modrm
);
15235 assign( sV
, getXMMReg(rE
) );
15236 DIP("%smovddup %s,%s\n",
15237 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
15239 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
15241 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15242 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15243 DIP("%smovddup %s,%s\n",
15244 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
15247 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15248 ( rG
, binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
15253 static Long
dis_MOVDDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15256 IRTemp addr
= IRTemp_INVALID
;
15259 IRTemp d0
= newTemp(Ity_I64
);
15260 IRTemp d1
= newTemp(Ity_I64
);
15261 UChar modrm
= getUChar(delta
);
15262 UInt rG
= gregOfRexRM(pfx
,modrm
);
15263 if (epartIsReg(modrm
)) {
15264 UInt rE
= eregOfRexRM(pfx
,modrm
);
15265 DIP("vmovddup %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
15267 assign ( d0
, getYMMRegLane64(rE
, 0) );
15268 assign ( d1
, getYMMRegLane64(rE
, 2) );
15270 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15271 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15272 assign( d1
, loadLE(Ity_I64
, binop(Iop_Add64
,
15273 mkexpr(addr
), mkU64(16))) );
15274 DIP("vmovddup %s,%s\n", dis_buf
, nameYMMReg(rG
));
15277 putYMMRegLane64( rG
, 0, mkexpr(d0
) );
15278 putYMMRegLane64( rG
, 1, mkexpr(d0
) );
15279 putYMMRegLane64( rG
, 2, mkexpr(d1
) );
15280 putYMMRegLane64( rG
, 3, mkexpr(d1
) );
15285 static Long
dis_MOVSxDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15286 Long delta
, Bool isAvx
, Bool isL
)
15288 IRTemp addr
= IRTemp_INVALID
;
15291 IRTemp sV
= newTemp(Ity_V128
);
15292 UChar modrm
= getUChar(delta
);
15293 UInt rG
= gregOfRexRM(pfx
,modrm
);
15294 IRTemp s3
, s2
, s1
, s0
;
15295 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15296 if (epartIsReg(modrm
)) {
15297 UInt rE
= eregOfRexRM(pfx
,modrm
);
15298 assign( sV
, getXMMReg(rE
) );
15299 DIP("%smovs%cdup %s,%s\n",
15300 isAvx
? "v" : "", isL
? 'l' : 'h', nameXMMReg(rE
), nameXMMReg(rG
));
15303 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15305 gen_SEGV_if_not_16_aligned( addr
);
15306 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15307 DIP("%smovs%cdup %s,%s\n",
15308 isAvx
? "v" : "", isL
? 'l' : 'h', dis_buf
, nameXMMReg(rG
));
15311 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15312 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15313 ( rG
, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15314 : mkV128from32s( s3
, s3
, s1
, s1
) );
15319 static Long
dis_MOVSxDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15320 Long delta
, Bool isL
)
15322 IRTemp addr
= IRTemp_INVALID
;
15325 IRTemp sV
= newTemp(Ity_V256
);
15326 UChar modrm
= getUChar(delta
);
15327 UInt rG
= gregOfRexRM(pfx
,modrm
);
15328 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
15329 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15330 if (epartIsReg(modrm
)) {
15331 UInt rE
= eregOfRexRM(pfx
,modrm
);
15332 assign( sV
, getYMMReg(rE
) );
15333 DIP("vmovs%cdup %s,%s\n",
15334 isL
? 'l' : 'h', nameYMMReg(rE
), nameYMMReg(rG
));
15337 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15338 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15339 DIP("vmovs%cdup %s,%s\n",
15340 isL
? 'l' : 'h', dis_buf
, nameYMMReg(rG
));
15343 breakupV256to32s( sV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
15344 putYMMRegLane128( rG
, 1, isL
? mkV128from32s( s6
, s6
, s4
, s4
)
15345 : mkV128from32s( s7
, s7
, s5
, s5
) );
15346 putYMMRegLane128( rG
, 0, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15347 : mkV128from32s( s3
, s3
, s1
, s1
) );
15352 static IRTemp
math_HADDPS_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15354 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15355 IRTemp leftV
= newTemp(Ity_V128
);
15356 IRTemp rightV
= newTemp(Ity_V128
);
15357 IRTemp rm
= newTemp(Ity_I32
);
15358 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15360 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15361 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
15363 assign( leftV
, mkV128from32s( s2
, s0
, d2
, d0
) );
15364 assign( rightV
, mkV128from32s( s3
, s1
, d3
, d1
) );
15366 IRTemp res
= newTemp(Ity_V128
);
15367 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15368 assign( res
, triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
15369 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15374 static IRTemp
math_HADDPD_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15376 IRTemp s1
, s0
, d1
, d0
;
15377 IRTemp leftV
= newTemp(Ity_V128
);
15378 IRTemp rightV
= newTemp(Ity_V128
);
15379 IRTemp rm
= newTemp(Ity_I32
);
15380 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
15382 breakupV128to64s( sV
, &s1
, &s0
);
15383 breakupV128to64s( dV
, &d1
, &d0
);
15385 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
15386 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
15388 IRTemp res
= newTemp(Ity_V128
);
15389 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15390 assign( res
, triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
15391 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15396 __attribute__((noinline
))
15398 Long
dis_ESC_0F__SSE3 ( Bool
* decode_OK
,
15399 const VexAbiInfo
* vbi
,
15400 Prefix pfx
, Int sz
, Long deltaIN
)
15402 IRTemp addr
= IRTemp_INVALID
;
15407 *decode_OK
= False
;
15409 Long delta
= deltaIN
;
15410 UChar opc
= getUChar(delta
);
15415 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15416 duplicating some lanes (2:2:0:0). */
15417 if (haveF3no66noF2(pfx
) && sz
== 4) {
15418 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15420 goto decode_success
;
15422 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15423 duplicating some lanes (0:1:0:1). */
15424 if (haveF2no66noF3(pfx
)
15425 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
15426 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15427 goto decode_success
;
15432 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15433 duplicating some lanes (3:3:1:1). */
15434 if (haveF3no66noF2(pfx
) && sz
== 4) {
15435 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15437 goto decode_success
;
15443 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15444 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15445 if (haveF2no66noF3(pfx
) && sz
== 4) {
15446 IRTemp eV
= newTemp(Ity_V128
);
15447 IRTemp gV
= newTemp(Ity_V128
);
15448 Bool isAdd
= opc
== 0x7C;
15449 const HChar
* str
= isAdd
? "add" : "sub";
15450 modrm
= getUChar(delta
);
15451 UInt rG
= gregOfRexRM(pfx
,modrm
);
15452 if (epartIsReg(modrm
)) {
15453 UInt rE
= eregOfRexRM(pfx
,modrm
);
15454 assign( eV
, getXMMReg(rE
) );
15455 DIP("h%sps %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15458 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15459 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15460 DIP("h%sps %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15464 assign( gV
, getXMMReg(rG
) );
15465 putXMMReg( rG
, mkexpr( math_HADDPS_128 ( gV
, eV
, isAdd
) ) );
15466 goto decode_success
;
15468 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15469 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15470 if (have66noF2noF3(pfx
) && sz
== 2) {
15471 IRTemp eV
= newTemp(Ity_V128
);
15472 IRTemp gV
= newTemp(Ity_V128
);
15473 Bool isAdd
= opc
== 0x7C;
15474 const HChar
* str
= isAdd
? "add" : "sub";
15475 modrm
= getUChar(delta
);
15476 UInt rG
= gregOfRexRM(pfx
,modrm
);
15477 if (epartIsReg(modrm
)) {
15478 UInt rE
= eregOfRexRM(pfx
,modrm
);
15479 assign( eV
, getXMMReg(rE
) );
15480 DIP("h%spd %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15483 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15484 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15485 DIP("h%spd %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15489 assign( gV
, getXMMReg(rG
) );
15490 putXMMReg( rG
, mkexpr( math_HADDPD_128 ( gV
, eV
, isAdd
) ) );
15491 goto decode_success
;
15496 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15497 if (have66noF2noF3(pfx
) && sz
== 2) {
15498 IRTemp eV
= newTemp(Ity_V128
);
15499 IRTemp gV
= newTemp(Ity_V128
);
15500 modrm
= getUChar(delta
);
15501 UInt rG
= gregOfRexRM(pfx
,modrm
);
15502 if (epartIsReg(modrm
)) {
15503 UInt rE
= eregOfRexRM(pfx
,modrm
);
15504 assign( eV
, getXMMReg(rE
) );
15505 DIP("addsubpd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15508 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15509 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15510 DIP("addsubpd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15514 assign( gV
, getXMMReg(rG
) );
15515 putXMMReg( rG
, mkexpr( math_ADDSUBPD_128 ( gV
, eV
) ) );
15516 goto decode_success
;
15518 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15519 if (haveF2no66noF3(pfx
) && sz
== 4) {
15520 IRTemp eV
= newTemp(Ity_V128
);
15521 IRTemp gV
= newTemp(Ity_V128
);
15522 modrm
= getUChar(delta
);
15523 UInt rG
= gregOfRexRM(pfx
,modrm
);
15525 modrm
= getUChar(delta
);
15526 if (epartIsReg(modrm
)) {
15527 UInt rE
= eregOfRexRM(pfx
,modrm
);
15528 assign( eV
, getXMMReg(rE
) );
15529 DIP("addsubps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15532 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15533 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15534 DIP("addsubps %s,%s\n", dis_buf
, nameXMMReg(rG
));
15538 assign( gV
, getXMMReg(rG
) );
15539 putXMMReg( rG
, mkexpr( math_ADDSUBPS_128 ( gV
, eV
) ) );
15540 goto decode_success
;
15545 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15546 if (haveF2no66noF3(pfx
) && sz
== 4) {
15547 modrm
= getUChar(delta
);
15548 if (epartIsReg(modrm
)) {
15549 goto decode_failure
;
15551 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15552 putXMMReg( gregOfRexRM(pfx
,modrm
),
15553 loadLE(Ity_V128
, mkexpr(addr
)) );
15554 DIP("lddqu %s,%s\n", dis_buf
,
15555 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15558 goto decode_success
;
15563 goto decode_failure
;
15568 *decode_OK
= False
;
15577 /*------------------------------------------------------------*/
15579 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15581 /*------------------------------------------------------------*/
15584 IRTemp
math_PSHUFB_XMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15586 IRTemp sHi
= newTemp(Ity_I64
);
15587 IRTemp sLo
= newTemp(Ity_I64
);
15588 IRTemp dHi
= newTemp(Ity_I64
);
15589 IRTemp dLo
= newTemp(Ity_I64
);
15590 IRTemp rHi
= newTemp(Ity_I64
);
15591 IRTemp rLo
= newTemp(Ity_I64
);
15592 IRTemp sevens
= newTemp(Ity_I64
);
15593 IRTemp mask0x80hi
= newTemp(Ity_I64
);
15594 IRTemp mask0x80lo
= newTemp(Ity_I64
);
15595 IRTemp maskBit3hi
= newTemp(Ity_I64
);
15596 IRTemp maskBit3lo
= newTemp(Ity_I64
);
15597 IRTemp sAnd7hi
= newTemp(Ity_I64
);
15598 IRTemp sAnd7lo
= newTemp(Ity_I64
);
15599 IRTemp permdHi
= newTemp(Ity_I64
);
15600 IRTemp permdLo
= newTemp(Ity_I64
);
15601 IRTemp res
= newTemp(Ity_V128
);
15603 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
15604 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
15605 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
15606 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
15608 assign( sevens
, mkU64(0x0707070707070707ULL
) );
15610 /* mask0x80hi = Not(SarN8x8(sHi,7))
15611 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
15612 sAnd7hi = And(sHi,sevens)
15613 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
15614 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
15615 rHi = And(permdHi,mask0x80hi)
15619 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sHi
),mkU8(7))));
15624 binop(Iop_ShlN8x8
,mkexpr(sHi
),mkU8(4)),
15627 assign(sAnd7hi
, binop(Iop_And64
,mkexpr(sHi
),mkexpr(sevens
)));
15634 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7hi
)),
15635 mkexpr(maskBit3hi
)),
15637 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7hi
)),
15638 unop(Iop_Not64
,mkexpr(maskBit3hi
))) ));
15640 assign(rHi
, binop(Iop_And64
,mkexpr(permdHi
),mkexpr(mask0x80hi
)) );
15642 /* And the same for the lower half of the result. What fun. */
15646 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sLo
),mkU8(7))));
15651 binop(Iop_ShlN8x8
,mkexpr(sLo
),mkU8(4)),
15654 assign(sAnd7lo
, binop(Iop_And64
,mkexpr(sLo
),mkexpr(sevens
)));
15661 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7lo
)),
15662 mkexpr(maskBit3lo
)),
15664 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7lo
)),
15665 unop(Iop_Not64
,mkexpr(maskBit3lo
))) ));
15667 assign(rLo
, binop(Iop_And64
,mkexpr(permdLo
),mkexpr(mask0x80lo
)) );
15669 assign(res
, binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
)));
15675 IRTemp
math_PSHUFB_YMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15677 IRTemp sHi
, sLo
, dHi
, dLo
;
15678 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15679 breakupV256toV128s( dV
, &dHi
, &dLo
);
15680 breakupV256toV128s( sV
, &sHi
, &sLo
);
15681 IRTemp res
= newTemp(Ity_V256
);
15682 assign(res
, binop(Iop_V128HLtoV256
,
15683 mkexpr(math_PSHUFB_XMM(dHi
, sHi
)),
15684 mkexpr(math_PSHUFB_XMM(dLo
, sLo
))));
15689 static Long
dis_PHADD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15690 Bool isAvx
, UChar opc
)
15692 IRTemp addr
= IRTemp_INVALID
;
15695 const HChar
* str
= "???";
15696 IROp opV64
= Iop_INVALID
;
15697 IROp opCatO
= Iop_CatOddLanes16x4
;
15698 IROp opCatE
= Iop_CatEvenLanes16x4
;
15699 IRTemp sV
= newTemp(Ity_V128
);
15700 IRTemp dV
= newTemp(Ity_V128
);
15701 IRTemp sHi
= newTemp(Ity_I64
);
15702 IRTemp sLo
= newTemp(Ity_I64
);
15703 IRTemp dHi
= newTemp(Ity_I64
);
15704 IRTemp dLo
= newTemp(Ity_I64
);
15705 UChar modrm
= getUChar(delta
);
15706 UInt rG
= gregOfRexRM(pfx
,modrm
);
15707 UInt rV
= isAvx
? getVexNvvvv(pfx
) : rG
;
15710 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15711 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15712 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15713 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15714 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15715 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15716 default: vassert(0);
15718 if (opc
== 0x02 || opc
== 0x06) {
15719 opCatO
= Iop_InterleaveHI32x2
;
15720 opCatE
= Iop_InterleaveLO32x2
;
15723 assign( dV
, getXMMReg(rV
) );
15725 if (epartIsReg(modrm
)) {
15726 UInt rE
= eregOfRexRM(pfx
,modrm
);
15727 assign( sV
, getXMMReg(rE
) );
15728 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15729 nameXMMReg(rE
), nameXMMReg(rG
));
15732 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15734 gen_SEGV_if_not_16_aligned( addr
);
15735 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15736 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15737 dis_buf
, nameXMMReg(rG
));
15741 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
15742 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
15743 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
15744 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
15746 /* This isn't a particularly efficient way to compute the
15747 result, but at least it avoids a proliferation of IROps,
15748 hence avoids complication all the backends. */
15750 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15752 binop(Iop_64HLtoV128
,
15754 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
15755 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
)) ),
15757 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
15758 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
)) ) ) );
15763 static Long
dis_PHADD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15766 IRTemp addr
= IRTemp_INVALID
;
15769 const HChar
* str
= "???";
15770 IROp opV64
= Iop_INVALID
;
15771 IROp opCatO
= Iop_CatOddLanes16x4
;
15772 IROp opCatE
= Iop_CatEvenLanes16x4
;
15773 IRTemp sV
= newTemp(Ity_V256
);
15774 IRTemp dV
= newTemp(Ity_V256
);
15775 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15776 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15777 UChar modrm
= getUChar(delta
);
15778 UInt rG
= gregOfRexRM(pfx
,modrm
);
15779 UInt rV
= getVexNvvvv(pfx
);
15782 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15783 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15784 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15785 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15786 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15787 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15788 default: vassert(0);
15790 if (opc
== 0x02 || opc
== 0x06) {
15791 opCatO
= Iop_InterleaveHI32x2
;
15792 opCatE
= Iop_InterleaveLO32x2
;
15795 assign( dV
, getYMMReg(rV
) );
15797 if (epartIsReg(modrm
)) {
15798 UInt rE
= eregOfRexRM(pfx
,modrm
);
15799 assign( sV
, getYMMReg(rE
) );
15800 DIP("vph%s %s,%s\n", str
, nameYMMReg(rE
), nameYMMReg(rG
));
15803 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15804 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15805 DIP("vph%s %s,%s\n", str
, dis_buf
, nameYMMReg(rG
));
15809 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
15810 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
15812 /* This isn't a particularly efficient way to compute the
15813 result, but at least it avoids a proliferation of IROps,
15814 hence avoids complication all the backends. */
15817 binop(Iop_V128HLtoV256
,
15818 binop(Iop_64HLtoV128
,
15820 binop(opCatE
,mkexpr(s3
),mkexpr(s2
)),
15821 binop(opCatO
,mkexpr(s3
),mkexpr(s2
)) ),
15823 binop(opCatE
,mkexpr(d3
),mkexpr(d2
)),
15824 binop(opCatO
,mkexpr(d3
),mkexpr(d2
)) ) ),
15825 binop(Iop_64HLtoV128
,
15827 binop(opCatE
,mkexpr(s1
),mkexpr(s0
)),
15828 binop(opCatO
,mkexpr(s1
),mkexpr(s0
)) ),
15830 binop(opCatE
,mkexpr(d1
),mkexpr(d0
)),
15831 binop(opCatO
,mkexpr(d1
),mkexpr(d0
)) ) ) ) );
15836 static IRTemp
math_PMADDUBSW_128 ( IRTemp dV
, IRTemp sV
)
15838 IRTemp sVoddsSX
= newTemp(Ity_V128
);
15839 IRTemp sVevensSX
= newTemp(Ity_V128
);
15840 IRTemp dVoddsZX
= newTemp(Ity_V128
);
15841 IRTemp dVevensZX
= newTemp(Ity_V128
);
15842 /* compute dV unsigned x sV signed */
15843 assign( sVoddsSX
, binop(Iop_SarN16x8
, mkexpr(sV
), mkU8(8)) );
15844 assign( sVevensSX
, binop(Iop_SarN16x8
,
15845 binop(Iop_ShlN16x8
, mkexpr(sV
), mkU8(8)),
15847 assign( dVoddsZX
, binop(Iop_ShrN16x8
, mkexpr(dV
), mkU8(8)) );
15848 assign( dVevensZX
, binop(Iop_ShrN16x8
,
15849 binop(Iop_ShlN16x8
, mkexpr(dV
), mkU8(8)),
15852 IRTemp res
= newTemp(Ity_V128
);
15853 assign( res
, binop(Iop_QAdd16Sx8
,
15854 binop(Iop_Mul16x8
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
15855 binop(Iop_Mul16x8
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
15863 IRTemp
math_PMADDUBSW_256 ( IRTemp dV
, IRTemp sV
)
15865 IRTemp sHi
, sLo
, dHi
, dLo
;
15866 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15867 breakupV256toV128s( dV
, &dHi
, &dLo
);
15868 breakupV256toV128s( sV
, &sHi
, &sLo
);
15869 IRTemp res
= newTemp(Ity_V256
);
15870 assign(res
, binop(Iop_V128HLtoV256
,
15871 mkexpr(math_PMADDUBSW_128(dHi
, sHi
)),
15872 mkexpr(math_PMADDUBSW_128(dLo
, sLo
))));
15877 __attribute__((noinline
))
15879 Long
dis_ESC_0F38__SupSSE3 ( Bool
* decode_OK
,
15880 const VexAbiInfo
* vbi
,
15881 Prefix pfx
, Int sz
, Long deltaIN
)
15883 IRTemp addr
= IRTemp_INVALID
;
15888 *decode_OK
= False
;
15890 Long delta
= deltaIN
;
15891 UChar opc
= getUChar(delta
);
15896 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15897 if (have66noF2noF3(pfx
)
15898 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15899 IRTemp sV
= newTemp(Ity_V128
);
15900 IRTemp dV
= newTemp(Ity_V128
);
15902 modrm
= getUChar(delta
);
15903 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
15905 if (epartIsReg(modrm
)) {
15906 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
15908 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
15909 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15911 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15912 gen_SEGV_if_not_16_aligned( addr
);
15913 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15915 DIP("pshufb %s,%s\n", dis_buf
,
15916 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15919 IRTemp res
= math_PSHUFB_XMM( dV
, sV
);
15920 putXMMReg(gregOfRexRM(pfx
,modrm
), mkexpr(res
));
15921 goto decode_success
;
15923 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15924 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15925 IRTemp sV
= newTemp(Ity_I64
);
15926 IRTemp dV
= newTemp(Ity_I64
);
15928 modrm
= getUChar(delta
);
15930 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15932 if (epartIsReg(modrm
)) {
15933 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15935 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15936 nameMMXReg(gregLO3ofRM(modrm
)));
15938 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15939 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15941 DIP("pshufb %s,%s\n", dis_buf
,
15942 nameMMXReg(gregLO3ofRM(modrm
)));
15946 gregLO3ofRM(modrm
),
15949 /* permute the lanes */
15953 binop(Iop_And64
, mkexpr(sV
), mkU64(0x0707070707070707ULL
))
15955 /* mask off lanes which have (index & 0x80) == 0x80 */
15956 unop(Iop_Not64
, binop(Iop_SarN8x8
, mkexpr(sV
), mkU8(7)))
15959 goto decode_success
;
15969 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15971 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15973 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15974 xmm) and G to G (xmm). */
15975 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15977 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15979 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15980 xmm) and G to G (xmm). */
15981 if (have66noF2noF3(pfx
)
15982 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15983 delta
= dis_PHADD_128( vbi
, pfx
, delta
, False
/*isAvx*/, opc
);
15984 goto decode_success
;
15986 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15987 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15989 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15991 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15992 mmx) and G to G (mmx). */
15993 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15995 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15997 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15998 mmx) and G to G (mmx). */
15999 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16000 const HChar
* str
= "???";
16001 IROp opV64
= Iop_INVALID
;
16002 IROp opCatO
= Iop_CatOddLanes16x4
;
16003 IROp opCatE
= Iop_CatEvenLanes16x4
;
16004 IRTemp sV
= newTemp(Ity_I64
);
16005 IRTemp dV
= newTemp(Ity_I64
);
16007 modrm
= getUChar(delta
);
16010 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
16011 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
16012 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
16013 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
16014 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
16015 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
16016 default: vassert(0);
16018 if (opc
== 0x02 || opc
== 0x06) {
16019 opCatO
= Iop_InterleaveHI32x2
;
16020 opCatE
= Iop_InterleaveLO32x2
;
16024 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16026 if (epartIsReg(modrm
)) {
16027 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16029 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16030 nameMMXReg(gregLO3ofRM(modrm
)));
16032 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16033 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16035 DIP("ph%s %s,%s\n", str
, dis_buf
,
16036 nameMMXReg(gregLO3ofRM(modrm
)));
16040 gregLO3ofRM(modrm
),
16042 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
16043 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
16046 goto decode_success
;
16051 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16052 Unsigned Bytes (XMM) */
16053 if (have66noF2noF3(pfx
)
16054 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16055 IRTemp sV
= newTemp(Ity_V128
);
16056 IRTemp dV
= newTemp(Ity_V128
);
16057 modrm
= getUChar(delta
);
16058 UInt rG
= gregOfRexRM(pfx
,modrm
);
16060 assign( dV
, getXMMReg(rG
) );
16062 if (epartIsReg(modrm
)) {
16063 UInt rE
= eregOfRexRM(pfx
,modrm
);
16064 assign( sV
, getXMMReg(rE
) );
16066 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
16068 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16069 gen_SEGV_if_not_16_aligned( addr
);
16070 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16072 DIP("pmaddubsw %s,%s\n", dis_buf
, nameXMMReg(rG
));
16075 putXMMReg( rG
, mkexpr( math_PMADDUBSW_128( dV
, sV
) ) );
16076 goto decode_success
;
16078 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16079 Unsigned Bytes (MMX) */
16080 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16081 IRTemp sV
= newTemp(Ity_I64
);
16082 IRTemp dV
= newTemp(Ity_I64
);
16083 IRTemp sVoddsSX
= newTemp(Ity_I64
);
16084 IRTemp sVevensSX
= newTemp(Ity_I64
);
16085 IRTemp dVoddsZX
= newTemp(Ity_I64
);
16086 IRTemp dVevensZX
= newTemp(Ity_I64
);
16088 modrm
= getUChar(delta
);
16090 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16092 if (epartIsReg(modrm
)) {
16093 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16095 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16096 nameMMXReg(gregLO3ofRM(modrm
)));
16098 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16099 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16101 DIP("pmaddubsw %s,%s\n", dis_buf
,
16102 nameMMXReg(gregLO3ofRM(modrm
)));
16105 /* compute dV unsigned x sV signed */
16107 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
16109 binop(Iop_SarN16x4
,
16110 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
16113 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
16115 binop(Iop_ShrN16x4
,
16116 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
16120 gregLO3ofRM(modrm
),
16121 binop(Iop_QAdd16Sx4
,
16122 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
16123 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
16126 goto decode_success
;
16133 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16134 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16135 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16136 if (have66noF2noF3(pfx
)
16137 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16138 IRTemp sV
= newTemp(Ity_V128
);
16139 IRTemp dV
= newTemp(Ity_V128
);
16140 IRTemp sHi
= newTemp(Ity_I64
);
16141 IRTemp sLo
= newTemp(Ity_I64
);
16142 IRTemp dHi
= newTemp(Ity_I64
);
16143 IRTemp dLo
= newTemp(Ity_I64
);
16144 const HChar
* str
= "???";
16148 case 0x08: laneszB
= 1; str
= "b"; break;
16149 case 0x09: laneszB
= 2; str
= "w"; break;
16150 case 0x0A: laneszB
= 4; str
= "d"; break;
16151 default: vassert(0);
16154 modrm
= getUChar(delta
);
16155 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16157 if (epartIsReg(modrm
)) {
16158 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16160 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16161 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16163 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16164 gen_SEGV_if_not_16_aligned( addr
);
16165 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16167 DIP("psign%s %s,%s\n", str
, dis_buf
,
16168 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16171 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16172 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16173 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16174 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16177 gregOfRexRM(pfx
,modrm
),
16178 binop(Iop_64HLtoV128
,
16179 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
16180 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
16183 goto decode_success
;
16185 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16186 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16187 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16188 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16189 IRTemp sV
= newTemp(Ity_I64
);
16190 IRTemp dV
= newTemp(Ity_I64
);
16191 const HChar
* str
= "???";
16195 case 0x08: laneszB
= 1; str
= "b"; break;
16196 case 0x09: laneszB
= 2; str
= "w"; break;
16197 case 0x0A: laneszB
= 4; str
= "d"; break;
16198 default: vassert(0);
16201 modrm
= getUChar(delta
);
16203 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16205 if (epartIsReg(modrm
)) {
16206 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16208 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16209 nameMMXReg(gregLO3ofRM(modrm
)));
16211 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16212 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16214 DIP("psign%s %s,%s\n", str
, dis_buf
,
16215 nameMMXReg(gregLO3ofRM(modrm
)));
16219 gregLO3ofRM(modrm
),
16220 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
16222 goto decode_success
;
16227 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16229 if (have66noF2noF3(pfx
)
16230 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16231 IRTemp sV
= newTemp(Ity_V128
);
16232 IRTemp dV
= newTemp(Ity_V128
);
16233 IRTemp sHi
= newTemp(Ity_I64
);
16234 IRTemp sLo
= newTemp(Ity_I64
);
16235 IRTemp dHi
= newTemp(Ity_I64
);
16236 IRTemp dLo
= newTemp(Ity_I64
);
16238 modrm
= getUChar(delta
);
16239 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16241 if (epartIsReg(modrm
)) {
16242 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16244 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16245 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16247 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16248 gen_SEGV_if_not_16_aligned( addr
);
16249 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16251 DIP("pmulhrsw %s,%s\n", dis_buf
,
16252 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16255 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16256 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16257 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16258 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16261 gregOfRexRM(pfx
,modrm
),
16262 binop(Iop_64HLtoV128
,
16263 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
16264 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
16267 goto decode_success
;
16269 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16271 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16272 IRTemp sV
= newTemp(Ity_I64
);
16273 IRTemp dV
= newTemp(Ity_I64
);
16275 modrm
= getUChar(delta
);
16277 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16279 if (epartIsReg(modrm
)) {
16280 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16282 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16283 nameMMXReg(gregLO3ofRM(modrm
)));
16285 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16286 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16288 DIP("pmulhrsw %s,%s\n", dis_buf
,
16289 nameMMXReg(gregLO3ofRM(modrm
)));
16293 gregLO3ofRM(modrm
),
16294 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
16296 goto decode_success
;
16303 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16304 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16305 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16306 if (have66noF2noF3(pfx
)
16307 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16308 IRTemp sV
= newTemp(Ity_V128
);
16309 const HChar
* str
= "???";
16313 case 0x1C: laneszB
= 1; str
= "b"; break;
16314 case 0x1D: laneszB
= 2; str
= "w"; break;
16315 case 0x1E: laneszB
= 4; str
= "d"; break;
16316 default: vassert(0);
16319 modrm
= getUChar(delta
);
16320 if (epartIsReg(modrm
)) {
16321 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16323 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16324 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16326 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16327 gen_SEGV_if_not_16_aligned( addr
);
16328 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16330 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16331 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16334 putXMMReg( gregOfRexRM(pfx
,modrm
),
16335 mkexpr(math_PABS_XMM(sV
, laneszB
)) );
16336 goto decode_success
;
16338 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16339 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16340 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16341 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16342 IRTemp sV
= newTemp(Ity_I64
);
16343 const HChar
* str
= "???";
16347 case 0x1C: laneszB
= 1; str
= "b"; break;
16348 case 0x1D: laneszB
= 2; str
= "w"; break;
16349 case 0x1E: laneszB
= 4; str
= "d"; break;
16350 default: vassert(0);
16353 modrm
= getUChar(delta
);
16356 if (epartIsReg(modrm
)) {
16357 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16359 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16360 nameMMXReg(gregLO3ofRM(modrm
)));
16362 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16363 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16365 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16366 nameMMXReg(gregLO3ofRM(modrm
)));
16369 putMMXReg( gregLO3ofRM(modrm
),
16370 mkexpr(math_PABS_MMX( sV
, laneszB
)) );
16371 goto decode_success
;
16381 *decode_OK
= False
;
16390 /*------------------------------------------------------------*/
16392 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16394 /*------------------------------------------------------------*/
16396 __attribute__((noinline
))
16398 Long
dis_ESC_0F3A__SupSSE3 ( Bool
* decode_OK
,
16399 const VexAbiInfo
* vbi
,
16400 Prefix pfx
, Int sz
, Long deltaIN
)
16403 IRTemp addr
= IRTemp_INVALID
;
16408 *decode_OK
= False
;
16410 Long delta
= deltaIN
;
16411 UChar opc
= getUChar(delta
);
16416 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16417 if (have66noF2noF3(pfx
)
16418 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16419 IRTemp sV
= newTemp(Ity_V128
);
16420 IRTemp dV
= newTemp(Ity_V128
);
16422 modrm
= getUChar(delta
);
16423 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16425 if (epartIsReg(modrm
)) {
16426 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16427 d64
= (Long
)getUChar(delta
+1);
16429 DIP("palignr $%lld,%s,%s\n", d64
,
16430 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16431 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16433 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16434 gen_SEGV_if_not_16_aligned( addr
);
16435 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16436 d64
= (Long
)getUChar(delta
+alen
);
16438 DIP("palignr $%lld,%s,%s\n", d64
,
16440 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16443 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, d64
);
16444 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
16445 goto decode_success
;
16447 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16448 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16449 IRTemp sV
= newTemp(Ity_I64
);
16450 IRTemp dV
= newTemp(Ity_I64
);
16451 IRTemp res
= newTemp(Ity_I64
);
16453 modrm
= getUChar(delta
);
16455 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16457 if (epartIsReg(modrm
)) {
16458 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16459 d64
= (Long
)getUChar(delta
+1);
16461 DIP("palignr $%lld,%s,%s\n", d64
,
16462 nameMMXReg(eregLO3ofRM(modrm
)),
16463 nameMMXReg(gregLO3ofRM(modrm
)));
16465 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16466 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16467 d64
= (Long
)getUChar(delta
+alen
);
16469 DIP("palignr $%lld%s,%s\n", d64
,
16471 nameMMXReg(gregLO3ofRM(modrm
)));
16475 assign( res
, mkexpr(sV
) );
16477 else if (d64
>= 1 && d64
<= 7) {
16480 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d64
)),
16481 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d64
))
16484 else if (d64
== 8) {
16485 assign( res
, mkexpr(dV
) );
16487 else if (d64
>= 9 && d64
<= 15) {
16488 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d64
-8))) );
16490 else if (d64
>= 16 && d64
<= 255) {
16491 assign( res
, mkU64(0) );
16496 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
16497 goto decode_success
;
16507 *decode_OK
= False
;
16516 /*------------------------------------------------------------*/
16518 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16520 /*------------------------------------------------------------*/
16522 __attribute__((noinline
))
16524 Long
dis_ESC_0F__SSE4 ( Bool
* decode_OK
,
16525 const VexArchInfo
* archinfo
,
16526 const VexAbiInfo
* vbi
,
16527 Prefix pfx
, Int sz
, Long deltaIN
)
16529 IRTemp addr
= IRTemp_INVALID
;
16530 IRType ty
= Ity_INVALID
;
16535 *decode_OK
= False
;
16537 Long delta
= deltaIN
;
16538 UChar opc
= getUChar(delta
);
16543 /* F3 0F B8 = POPCNT{W,L,Q}
16544 Count the number of 1 bits in a register
16546 if (haveF3noF2(pfx
) /* so both 66 and REX.W are possibilities */
16547 && (sz
== 2 || sz
== 4 || sz
== 8)) {
16548 /*IRType*/ ty
= szToITy(sz
);
16549 IRTemp src
= newTemp(ty
);
16550 modrm
= getUChar(delta
);
16551 if (epartIsReg(modrm
)) {
16552 assign(src
, getIRegE(sz
, pfx
, modrm
));
16554 DIP("popcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16555 nameIRegG(sz
, pfx
, modrm
));
16557 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16558 assign(src
, loadLE(ty
, mkexpr(addr
)));
16560 DIP("popcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16561 nameIRegG(sz
, pfx
, modrm
));
16564 IRTemp result
= gen_POPCOUNT(ty
, src
);
16565 putIRegG(sz
, pfx
, modrm
, mkexpr(result
));
16567 // Update flags. This is pretty lame .. perhaps can do better
16568 // if this turns out to be performance critical.
16569 // O S A C P are cleared. Z is set if SRC == 0.
16570 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16571 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16572 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16573 stmt( IRStmt_Put( OFFB_CC_DEP1
,
16577 widenUto64(mkexpr(src
)),
16579 mkU8(AMD64G_CC_SHIFT_Z
))));
16581 goto decode_success
;
16586 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16587 which we can only decode if we're sure this is a BMI1 capable cpu
16588 that supports TZCNT, since otherwise it's BSF, which behaves
16589 differently on zero source. */
16590 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16591 && (sz
== 2 || sz
== 4 || sz
== 8)
16592 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
)) {
16593 /*IRType*/ ty
= szToITy(sz
);
16594 IRTemp src
= newTemp(ty
);
16595 modrm
= getUChar(delta
);
16596 if (epartIsReg(modrm
)) {
16597 assign(src
, getIRegE(sz
, pfx
, modrm
));
16599 DIP("tzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16600 nameIRegG(sz
, pfx
, modrm
));
16602 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16603 assign(src
, loadLE(ty
, mkexpr(addr
)));
16605 DIP("tzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16606 nameIRegG(sz
, pfx
, modrm
));
16609 IRTemp res
= gen_TZCNT(ty
, src
);
16610 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16612 // Update flags. This is pretty lame .. perhaps can do better
16613 // if this turns out to be performance critical.
16614 // O S A P are cleared. Z is set if RESULT == 0.
16615 // C is set if SRC is zero.
16616 IRTemp src64
= newTemp(Ity_I64
);
16617 IRTemp res64
= newTemp(Ity_I64
);
16618 assign(src64
, widenUto64(mkexpr(src
)));
16619 assign(res64
, widenUto64(mkexpr(res
)));
16621 IRTemp oszacp
= newTemp(Ity_I64
);
16627 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16628 mkU8(AMD64G_CC_SHIFT_Z
)),
16631 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16632 mkU8(AMD64G_CC_SHIFT_C
))
16636 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16637 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16638 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16639 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16641 goto decode_success
;
16646 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16647 which we can only decode if we're sure this is an AMD cpu
16648 that supports LZCNT, since otherwise it's BSR, which behaves
16649 differently. Bizarrely, my Sandy Bridge also accepts these
16650 instructions but produces different results. */
16651 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16652 && (sz
== 2 || sz
== 4 || sz
== 8)
16653 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
)) {
16654 /*IRType*/ ty
= szToITy(sz
);
16655 IRTemp src
= newTemp(ty
);
16656 modrm
= getUChar(delta
);
16657 if (epartIsReg(modrm
)) {
16658 assign(src
, getIRegE(sz
, pfx
, modrm
));
16660 DIP("lzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16661 nameIRegG(sz
, pfx
, modrm
));
16663 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16664 assign(src
, loadLE(ty
, mkexpr(addr
)));
16666 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16667 nameIRegG(sz
, pfx
, modrm
));
16670 IRTemp res
= gen_LZCNT(ty
, src
);
16671 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16673 // Update flags. This is pretty lame .. perhaps can do better
16674 // if this turns out to be performance critical.
16675 // O S A P are cleared. Z is set if RESULT == 0.
16676 // C is set if SRC is zero.
16677 IRTemp src64
= newTemp(Ity_I64
);
16678 IRTemp res64
= newTemp(Ity_I64
);
16679 assign(src64
, widenUto64(mkexpr(src
)));
16680 assign(res64
, widenUto64(mkexpr(res
)));
16682 IRTemp oszacp
= newTemp(Ity_I64
);
16688 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16689 mkU8(AMD64G_CC_SHIFT_Z
)),
16692 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16693 mkU8(AMD64G_CC_SHIFT_C
))
16697 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16698 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16699 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16700 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16702 goto decode_success
;
16712 *decode_OK
= False
;
16721 /*------------------------------------------------------------*/
16723 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16725 /*------------------------------------------------------------*/
16727 static IRTemp
math_PBLENDVB_128 ( IRTemp vecE
, IRTemp vecG
,
16728 IRTemp vec0
/*controlling mask*/,
16729 UInt gran
, IROp opSAR
)
16731 /* The tricky bit is to convert vec0 into a suitable mask, by
16732 copying the most significant bit of each lane into all positions
16734 IRTemp sh
= newTemp(Ity_I8
);
16735 assign(sh
, mkU8(8 * gran
- 1));
16737 IRTemp mask
= newTemp(Ity_V128
);
16738 assign(mask
, binop(opSAR
, mkexpr(vec0
), mkexpr(sh
)));
16740 IRTemp notmask
= newTemp(Ity_V128
);
16741 assign(notmask
, unop(Iop_NotV128
, mkexpr(mask
)));
16743 IRTemp res
= newTemp(Ity_V128
);
16744 assign(res
, binop(Iop_OrV128
,
16745 binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(mask
)),
16746 binop(Iop_AndV128
, mkexpr(vecG
), mkexpr(notmask
))));
16750 static IRTemp
math_PBLENDVB_256 ( IRTemp vecE
, IRTemp vecG
,
16751 IRTemp vec0
/*controlling mask*/,
16752 UInt gran
, IROp opSAR128
)
16754 /* The tricky bit is to convert vec0 into a suitable mask, by
16755 copying the most significant bit of each lane into all positions
16757 IRTemp sh
= newTemp(Ity_I8
);
16758 assign(sh
, mkU8(8 * gran
- 1));
16760 IRTemp vec0Hi
= IRTemp_INVALID
;
16761 IRTemp vec0Lo
= IRTemp_INVALID
;
16762 breakupV256toV128s( vec0
, &vec0Hi
, &vec0Lo
);
16764 IRTemp mask
= newTemp(Ity_V256
);
16765 assign(mask
, binop(Iop_V128HLtoV256
,
16766 binop(opSAR128
, mkexpr(vec0Hi
), mkexpr(sh
)),
16767 binop(opSAR128
, mkexpr(vec0Lo
), mkexpr(sh
))));
16769 IRTemp notmask
= newTemp(Ity_V256
);
16770 assign(notmask
, unop(Iop_NotV256
, mkexpr(mask
)));
16772 IRTemp res
= newTemp(Ity_V256
);
16773 assign(res
, binop(Iop_OrV256
,
16774 binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(mask
)),
16775 binop(Iop_AndV256
, mkexpr(vecG
), mkexpr(notmask
))));
16779 static Long
dis_VBLENDV_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16780 const HChar
*name
, UInt gran
, IROp opSAR
)
16782 IRTemp addr
= IRTemp_INVALID
;
16785 UChar modrm
= getUChar(delta
);
16786 UInt rG
= gregOfRexRM(pfx
, modrm
);
16787 UInt rV
= getVexNvvvv(pfx
);
16788 UInt rIS4
= 0xFF; /* invalid */
16789 IRTemp vecE
= newTemp(Ity_V128
);
16790 IRTemp vecV
= newTemp(Ity_V128
);
16791 IRTemp vecIS4
= newTemp(Ity_V128
);
16792 if (epartIsReg(modrm
)) {
16794 UInt rE
= eregOfRexRM(pfx
, modrm
);
16795 assign(vecE
, getXMMReg(rE
));
16796 UChar ib
= getUChar(delta
);
16797 rIS4
= (ib
>> 4) & 0xF;
16798 DIP("%s %s,%s,%s,%s\n",
16799 name
, nameXMMReg(rIS4
), nameXMMReg(rE
),
16800 nameXMMReg(rV
), nameXMMReg(rG
));
16802 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16804 assign(vecE
, loadLE(Ity_V128
, mkexpr(addr
)));
16805 UChar ib
= getUChar(delta
);
16806 rIS4
= (ib
>> 4) & 0xF;
16807 DIP("%s %s,%s,%s,%s\n",
16808 name
, nameXMMReg(rIS4
), dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
16811 assign(vecV
, getXMMReg(rV
));
16812 assign(vecIS4
, getXMMReg(rIS4
));
16813 IRTemp res
= math_PBLENDVB_128( vecE
, vecV
, vecIS4
, gran
, opSAR
);
16814 putYMMRegLoAndZU( rG
, mkexpr(res
) );
16818 static Long
dis_VBLENDV_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16819 const HChar
*name
, UInt gran
, IROp opSAR128
)
16821 IRTemp addr
= IRTemp_INVALID
;
16824 UChar modrm
= getUChar(delta
);
16825 UInt rG
= gregOfRexRM(pfx
, modrm
);
16826 UInt rV
= getVexNvvvv(pfx
);
16827 UInt rIS4
= 0xFF; /* invalid */
16828 IRTemp vecE
= newTemp(Ity_V256
);
16829 IRTemp vecV
= newTemp(Ity_V256
);
16830 IRTemp vecIS4
= newTemp(Ity_V256
);
16831 if (epartIsReg(modrm
)) {
16833 UInt rE
= eregOfRexRM(pfx
, modrm
);
16834 assign(vecE
, getYMMReg(rE
));
16835 UChar ib
= getUChar(delta
);
16836 rIS4
= (ib
>> 4) & 0xF;
16837 DIP("%s %s,%s,%s,%s\n",
16838 name
, nameYMMReg(rIS4
), nameYMMReg(rE
),
16839 nameYMMReg(rV
), nameYMMReg(rG
));
16841 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16843 assign(vecE
, loadLE(Ity_V256
, mkexpr(addr
)));
16844 UChar ib
= getUChar(delta
);
16845 rIS4
= (ib
>> 4) & 0xF;
16846 DIP("%s %s,%s,%s,%s\n",
16847 name
, nameYMMReg(rIS4
), dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
16850 assign(vecV
, getYMMReg(rV
));
16851 assign(vecIS4
, getYMMReg(rIS4
));
16852 IRTemp res
= math_PBLENDVB_256( vecE
, vecV
, vecIS4
, gran
, opSAR128
);
16853 putYMMReg( rG
, mkexpr(res
) );
16857 static void finish_xTESTy ( IRTemp andV
, IRTemp andnV
, Int sign
)
16859 /* Set Z=1 iff (vecE & vecG) == 0
16860 Set C=1 iff (vecE & not vecG) == 0
16863 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16865 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16866 and bottom 64-bits together. It relies on this trick:
16868 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16870 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16871 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16873 and so the OR of the above 2 exprs produces
16874 [a OR b, a OR b], from which we simply take the lower half.
16876 IRTemp and64
= newTemp(Ity_I64
);
16877 IRTemp andn64
= newTemp(Ity_I64
);
16882 binop(Iop_InterleaveLO64x2
,
16883 mkexpr(andV
), mkexpr(andV
)),
16884 binop(Iop_InterleaveHI64x2
,
16885 mkexpr(andV
), mkexpr(andV
)))));
16890 binop(Iop_InterleaveLO64x2
,
16891 mkexpr(andnV
), mkexpr(andnV
)),
16892 binop(Iop_InterleaveHI64x2
,
16893 mkexpr(andnV
), mkexpr(andnV
)))));
16895 IRTemp z64
= newTemp(Ity_I64
);
16896 IRTemp c64
= newTemp(Ity_I64
);
16898 /* When only interested in the most significant bit, just shift
16899 arithmetically right and negate. */
16902 binop(Iop_Sar64
, mkexpr(and64
), mkU8(63))));
16906 binop(Iop_Sar64
, mkexpr(andn64
), mkU8(63))));
16909 /* When interested in bit 31 and bit 63, mask those bits and
16910 fallthrough into the PTEST handling. */
16911 IRTemp t0
= newTemp(Ity_I64
);
16912 IRTemp t1
= newTemp(Ity_I64
);
16913 IRTemp t2
= newTemp(Ity_I64
);
16914 assign(t0
, mkU64(0x8000000080000000ULL
));
16915 assign(t1
, binop(Iop_And64
, mkexpr(and64
), mkexpr(t0
)));
16916 assign(t2
, binop(Iop_And64
, mkexpr(andn64
), mkexpr(t0
)));
16920 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16921 slice out the Z and C bits conveniently. We use the standard
16922 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16923 done by "(x | -x) >>s (word-size - 1)".
16929 binop(Iop_Sub64
, mkU64(0), mkexpr(and64
)),
16930 mkexpr(and64
)), mkU8(63))));
16936 binop(Iop_Sub64
, mkU64(0), mkexpr(andn64
)),
16937 mkexpr(andn64
)), mkU8(63))));
16940 /* And finally, slice out the Z and C flags and set the flags
16941 thunk to COPY for them. OSAP are set to zero. */
16942 IRTemp newOSZACP
= newTemp(Ity_I64
);
16945 binop(Iop_And64
, mkexpr(z64
), mkU64(AMD64G_CC_MASK_Z
)),
16946 binop(Iop_And64
, mkexpr(c64
), mkU64(AMD64G_CC_MASK_C
))));
16948 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(newOSZACP
)));
16949 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16950 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16951 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16955 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16956 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16957 static Long
dis_xTESTy_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16958 Long delta
, Bool isAvx
, Int sign
)
16960 IRTemp addr
= IRTemp_INVALID
;
16963 UChar modrm
= getUChar(delta
);
16964 UInt rG
= gregOfRexRM(pfx
, modrm
);
16965 IRTemp vecE
= newTemp(Ity_V128
);
16966 IRTemp vecG
= newTemp(Ity_V128
);
16968 if ( epartIsReg(modrm
) ) {
16969 UInt rE
= eregOfRexRM(pfx
, modrm
);
16970 assign(vecE
, getXMMReg(rE
));
16972 DIP( "%s%stest%s %s,%s\n",
16973 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16974 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16975 nameXMMReg(rE
), nameXMMReg(rG
) );
16977 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16979 gen_SEGV_if_not_16_aligned( addr
);
16980 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
16982 DIP( "%s%stest%s %s,%s\n",
16983 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16984 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16985 dis_buf
, nameXMMReg(rG
) );
16988 assign(vecG
, getXMMReg(rG
));
16990 /* Set Z=1 iff (vecE & vecG) == 0
16991 Set C=1 iff (vecE & not vecG) == 0
16994 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16995 IRTemp andV
= newTemp(Ity_V128
);
16996 IRTemp andnV
= newTemp(Ity_V128
);
16997 assign(andV
, binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(vecG
)));
16998 assign(andnV
, binop(Iop_AndV128
,
17000 binop(Iop_XorV128
, mkexpr(vecG
),
17003 finish_xTESTy ( andV
, andnV
, sign
);
17008 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
17009 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
17010 static Long
dis_xTESTy_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17011 Long delta
, Int sign
)
17013 IRTemp addr
= IRTemp_INVALID
;
17016 UChar modrm
= getUChar(delta
);
17017 UInt rG
= gregOfRexRM(pfx
, modrm
);
17018 IRTemp vecE
= newTemp(Ity_V256
);
17019 IRTemp vecG
= newTemp(Ity_V256
);
17021 if ( epartIsReg(modrm
) ) {
17022 UInt rE
= eregOfRexRM(pfx
, modrm
);
17023 assign(vecE
, getYMMReg(rE
));
17025 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
17026 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
17027 nameYMMReg(rE
), nameYMMReg(rG
) );
17029 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17030 assign(vecE
, loadLE( Ity_V256
, mkexpr(addr
) ));
17032 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
17033 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
17034 dis_buf
, nameYMMReg(rG
) );
17037 assign(vecG
, getYMMReg(rG
));
17039 /* Set Z=1 iff (vecE & vecG) == 0
17040 Set C=1 iff (vecE & not vecG) == 0
17043 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
17044 IRTemp andV
= newTemp(Ity_V256
);
17045 IRTemp andnV
= newTemp(Ity_V256
);
17046 assign(andV
, binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(vecG
)));
17047 assign(andnV
, binop(Iop_AndV256
,
17048 mkexpr(vecE
), unop(Iop_NotV256
, mkexpr(vecG
))));
17050 IRTemp andVhi
= IRTemp_INVALID
;
17051 IRTemp andVlo
= IRTemp_INVALID
;
17052 IRTemp andnVhi
= IRTemp_INVALID
;
17053 IRTemp andnVlo
= IRTemp_INVALID
;
17054 breakupV256toV128s( andV
, &andVhi
, &andVlo
);
17055 breakupV256toV128s( andnV
, &andnVhi
, &andnVlo
);
17057 IRTemp andV128
= newTemp(Ity_V128
);
17058 IRTemp andnV128
= newTemp(Ity_V128
);
17059 assign( andV128
, binop( Iop_OrV128
, mkexpr(andVhi
), mkexpr(andVlo
) ) );
17060 assign( andnV128
, binop( Iop_OrV128
, mkexpr(andnVhi
), mkexpr(andnVlo
) ) );
17062 finish_xTESTy ( andV128
, andnV128
, sign
);
17067 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
17068 static Long
dis_PMOVxXBW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17069 Long delta
, Bool isAvx
, Bool xIsZ
)
17071 IRTemp addr
= IRTemp_INVALID
;
17074 IRTemp srcVec
= newTemp(Ity_V128
);
17075 UChar modrm
= getUChar(delta
);
17076 const HChar
* mbV
= isAvx
? "v" : "";
17077 const HChar how
= xIsZ
? 'z' : 's';
17078 UInt rG
= gregOfRexRM(pfx
, modrm
);
17079 if ( epartIsReg(modrm
) ) {
17080 UInt rE
= eregOfRexRM(pfx
, modrm
);
17081 assign( srcVec
, getXMMReg(rE
) );
17083 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17085 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17087 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17089 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17093 = xIsZ
/* do math for either zero or sign extend */
17094 ? binop( Iop_InterleaveLO8x16
,
17095 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17096 : binop( Iop_SarN16x8
,
17097 binop( Iop_ShlN16x8
,
17098 binop( Iop_InterleaveLO8x16
,
17099 IRExpr_Const( IRConst_V128(0) ),
17104 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17110 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17111 static Long
dis_PMOVxXBW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17112 Long delta
, Bool xIsZ
)
17114 IRTemp addr
= IRTemp_INVALID
;
17117 IRTemp srcVec
= newTemp(Ity_V128
);
17118 UChar modrm
= getUChar(delta
);
17119 UChar how
= xIsZ
? 'z' : 's';
17120 UInt rG
= gregOfRexRM(pfx
, modrm
);
17121 if ( epartIsReg(modrm
) ) {
17122 UInt rE
= eregOfRexRM(pfx
, modrm
);
17123 assign( srcVec
, getXMMReg(rE
) );
17125 DIP( "vpmov%cxbw %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17127 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17128 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17130 DIP( "vpmov%cxbw %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17133 /* First do zero extend. */
17135 = binop( Iop_V128HLtoV256
,
17136 binop( Iop_InterleaveHI8x16
,
17137 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17138 binop( Iop_InterleaveLO8x16
,
17139 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17140 /* And if needed sign extension as well. */
17142 res
= binop( Iop_SarN16x16
,
17143 binop( Iop_ShlN16x16
, res
, mkU8(8) ), mkU8(8) );
17145 putYMMReg ( rG
, res
);
17151 static Long
dis_PMOVxXWD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17152 Long delta
, Bool isAvx
, Bool xIsZ
)
17154 IRTemp addr
= IRTemp_INVALID
;
17157 IRTemp srcVec
= newTemp(Ity_V128
);
17158 UChar modrm
= getUChar(delta
);
17159 const HChar
* mbV
= isAvx
? "v" : "";
17160 const HChar how
= xIsZ
? 'z' : 's';
17161 UInt rG
= gregOfRexRM(pfx
, modrm
);
17163 if ( epartIsReg(modrm
) ) {
17164 UInt rE
= eregOfRexRM(pfx
, modrm
);
17165 assign( srcVec
, getXMMReg(rE
) );
17167 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17169 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17171 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17173 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17177 = binop( Iop_InterleaveLO16x8
,
17178 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) );
17180 res
= binop(Iop_SarN32x4
,
17181 binop(Iop_ShlN32x4
, res
, mkU8(16)), mkU8(16));
17183 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17184 ( gregOfRexRM(pfx
, modrm
), res
);
17190 static Long
dis_PMOVxXWD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17191 Long delta
, Bool xIsZ
)
17193 IRTemp addr
= IRTemp_INVALID
;
17196 IRTemp srcVec
= newTemp(Ity_V128
);
17197 UChar modrm
= getUChar(delta
);
17198 UChar how
= xIsZ
? 'z' : 's';
17199 UInt rG
= gregOfRexRM(pfx
, modrm
);
17201 if ( epartIsReg(modrm
) ) {
17202 UInt rE
= eregOfRexRM(pfx
, modrm
);
17203 assign( srcVec
, getXMMReg(rE
) );
17205 DIP( "vpmov%cxwd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17207 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17208 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17210 DIP( "vpmov%cxwd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17214 = binop( Iop_V128HLtoV256
,
17215 binop( Iop_InterleaveHI16x8
,
17216 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17217 binop( Iop_InterleaveLO16x8
,
17218 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17220 res
= binop(Iop_SarN32x8
,
17221 binop(Iop_ShlN32x8
, res
, mkU8(16)), mkU8(16));
17223 putYMMReg ( rG
, res
);
17229 static Long
dis_PMOVSXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17230 Long delta
, Bool isAvx
)
17232 IRTemp addr
= IRTemp_INVALID
;
17235 IRTemp srcBytes
= newTemp(Ity_I32
);
17236 UChar modrm
= getUChar(delta
);
17237 const HChar
* mbV
= isAvx
? "v" : "";
17238 UInt rG
= gregOfRexRM(pfx
, modrm
);
17240 if ( epartIsReg( modrm
) ) {
17241 UInt rE
= eregOfRexRM(pfx
, modrm
);
17242 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17244 DIP( "%spmovsxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17246 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17247 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17249 DIP( "%spmovsxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17252 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17253 ( rG
, binop( Iop_64HLtoV128
,
17255 unop( Iop_32HIto16
, mkexpr(srcBytes
) ) ),
17257 unop( Iop_32to16
, mkexpr(srcBytes
) ) ) ) );
17262 static Long
dis_PMOVSXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
17264 IRTemp addr
= IRTemp_INVALID
;
17267 IRTemp srcBytes
= newTemp(Ity_I64
);
17268 UChar modrm
= getUChar(delta
);
17269 UInt rG
= gregOfRexRM(pfx
, modrm
);
17270 IRTemp s3
, s2
, s1
, s0
;
17271 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17273 if ( epartIsReg( modrm
) ) {
17274 UInt rE
= eregOfRexRM(pfx
, modrm
);
17275 assign( srcBytes
, getXMMRegLane64( rE
, 0 ) );
17277 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17279 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17280 assign( srcBytes
, loadLE( Ity_I64
, mkexpr(addr
) ) );
17282 DIP( "vpmovsxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17285 breakup64to16s( srcBytes
, &s3
, &s2
, &s1
, &s0
);
17286 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17287 binop( Iop_64HLtoV128
,
17288 unop( Iop_16Sto64
, mkexpr(s3
) ),
17289 unop( Iop_16Sto64
, mkexpr(s2
) ) ),
17290 binop( Iop_64HLtoV128
,
17291 unop( Iop_16Sto64
, mkexpr(s1
) ),
17292 unop( Iop_16Sto64
, mkexpr(s0
) ) ) ) );
17297 static Long
dis_PMOVZXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17298 Long delta
, Bool isAvx
)
17300 IRTemp addr
= IRTemp_INVALID
;
17303 IRTemp srcVec
= newTemp(Ity_V128
);
17304 UChar modrm
= getUChar(delta
);
17305 const HChar
* mbV
= isAvx
? "v" : "";
17306 UInt rG
= gregOfRexRM(pfx
, modrm
);
17308 if ( epartIsReg( modrm
) ) {
17309 UInt rE
= eregOfRexRM(pfx
, modrm
);
17310 assign( srcVec
, getXMMReg(rE
) );
17312 DIP( "%spmovzxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17314 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17316 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17318 DIP( "%spmovzxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17321 IRTemp zeroVec
= newTemp( Ity_V128
);
17322 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17324 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17325 ( rG
, binop( Iop_InterleaveLO16x8
,
17327 binop( Iop_InterleaveLO16x8
,
17328 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) );
17333 static Long
dis_PMOVZXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17336 IRTemp addr
= IRTemp_INVALID
;
17339 IRTemp srcVec
= newTemp(Ity_V128
);
17340 UChar modrm
= getUChar(delta
);
17341 UInt rG
= gregOfRexRM(pfx
, modrm
);
17343 if ( epartIsReg( modrm
) ) {
17344 UInt rE
= eregOfRexRM(pfx
, modrm
);
17345 assign( srcVec
, getXMMReg(rE
) );
17347 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17349 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17351 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17353 DIP( "vpmovzxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17356 IRTemp zeroVec
= newTemp( Ity_V128
);
17357 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17359 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17360 binop( Iop_InterleaveHI16x8
,
17362 binop( Iop_InterleaveLO16x8
,
17363 mkexpr(zeroVec
), mkexpr(srcVec
) ) ),
17364 binop( Iop_InterleaveLO16x8
,
17366 binop( Iop_InterleaveLO16x8
,
17367 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17372 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17373 static Long
dis_PMOVxXDQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17374 Long delta
, Bool isAvx
, Bool xIsZ
)
17376 IRTemp addr
= IRTemp_INVALID
;
17379 IRTemp srcI64
= newTemp(Ity_I64
);
17380 IRTemp srcVec
= newTemp(Ity_V128
);
17381 UChar modrm
= getUChar(delta
);
17382 const HChar
* mbV
= isAvx
? "v" : "";
17383 const HChar how
= xIsZ
? 'z' : 's';
17384 UInt rG
= gregOfRexRM(pfx
, modrm
);
17385 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17386 thing in a V128, with arbitrary junk in the top 64 bits. Use
17387 one or both of them and let iropt clean up afterwards (as
17389 if ( epartIsReg(modrm
) ) {
17390 UInt rE
= eregOfRexRM(pfx
, modrm
);
17391 assign( srcVec
, getXMMReg(rE
) );
17392 assign( srcI64
, unop(Iop_V128to64
, mkexpr(srcVec
)) );
17394 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17396 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17397 assign( srcI64
, loadLE(Ity_I64
, mkexpr(addr
)) );
17398 assign( srcVec
, unop( Iop_64UtoV128
, mkexpr(srcI64
)) );
17400 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17404 = xIsZ
/* do math for either zero or sign extend */
17405 ? binop( Iop_InterleaveLO32x4
,
17406 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17407 : binop( Iop_64HLtoV128
,
17409 unop( Iop_64HIto32
, mkexpr(srcI64
) ) ),
17411 unop( Iop_64to32
, mkexpr(srcI64
) ) ) );
17413 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17419 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17420 static Long
dis_PMOVxXDQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17421 Long delta
, Bool xIsZ
)
17423 IRTemp addr
= IRTemp_INVALID
;
17426 IRTemp srcVec
= newTemp(Ity_V128
);
17427 UChar modrm
= getUChar(delta
);
17428 UChar how
= xIsZ
? 'z' : 's';
17429 UInt rG
= gregOfRexRM(pfx
, modrm
);
17430 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17431 thing in a V128, with arbitrary junk in the top 64 bits. Use
17432 one or both of them and let iropt clean up afterwards (as
17434 if ( epartIsReg(modrm
) ) {
17435 UInt rE
= eregOfRexRM(pfx
, modrm
);
17436 assign( srcVec
, getXMMReg(rE
) );
17438 DIP( "vpmov%cxdq %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17440 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17441 assign( srcVec
, loadLE(Ity_V128
, mkexpr(addr
)) );
17443 DIP( "vpmov%cxdq %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17448 res
= binop( Iop_V128HLtoV256
,
17449 binop( Iop_InterleaveHI32x4
,
17450 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17451 binop( Iop_InterleaveLO32x4
,
17452 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17454 IRTemp s3
, s2
, s1
, s0
;
17455 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17456 breakupV128to32s( srcVec
, &s3
, &s2
, &s1
, &s0
);
17457 res
= binop( Iop_V128HLtoV256
,
17458 binop( Iop_64HLtoV128
,
17459 unop( Iop_32Sto64
, mkexpr(s3
) ),
17460 unop( Iop_32Sto64
, mkexpr(s2
) ) ),
17461 binop( Iop_64HLtoV128
,
17462 unop( Iop_32Sto64
, mkexpr(s1
) ),
17463 unop( Iop_32Sto64
, mkexpr(s0
) ) ) );
17466 putYMMReg ( rG
, res
);
17472 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17473 static Long
dis_PMOVxXBD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17474 Long delta
, Bool isAvx
, Bool xIsZ
)
17476 IRTemp addr
= IRTemp_INVALID
;
17479 IRTemp srcVec
= newTemp(Ity_V128
);
17480 UChar modrm
= getUChar(delta
);
17481 const HChar
* mbV
= isAvx
? "v" : "";
17482 const HChar how
= xIsZ
? 'z' : 's';
17483 UInt rG
= gregOfRexRM(pfx
, modrm
);
17484 if ( epartIsReg(modrm
) ) {
17485 UInt rE
= eregOfRexRM(pfx
, modrm
);
17486 assign( srcVec
, getXMMReg(rE
) );
17488 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17490 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17492 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17494 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17497 IRTemp zeroVec
= newTemp(Ity_V128
);
17498 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17501 = binop(Iop_InterleaveLO8x16
,
17503 binop(Iop_InterleaveLO8x16
,
17504 mkexpr(zeroVec
), mkexpr(srcVec
)));
17506 res
= binop(Iop_SarN32x4
,
17507 binop(Iop_ShlN32x4
, res
, mkU8(24)), mkU8(24));
17509 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17515 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17516 static Long
dis_PMOVxXBD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17517 Long delta
, Bool xIsZ
)
17519 IRTemp addr
= IRTemp_INVALID
;
17522 IRTemp srcVec
= newTemp(Ity_V128
);
17523 UChar modrm
= getUChar(delta
);
17524 UChar how
= xIsZ
? 'z' : 's';
17525 UInt rG
= gregOfRexRM(pfx
, modrm
);
17526 if ( epartIsReg(modrm
) ) {
17527 UInt rE
= eregOfRexRM(pfx
, modrm
);
17528 assign( srcVec
, getXMMReg(rE
) );
17530 DIP( "vpmov%cxbd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17532 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17534 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17536 DIP( "vpmov%cxbd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17539 IRTemp zeroVec
= newTemp(Ity_V128
);
17540 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17543 = binop( Iop_V128HLtoV256
,
17544 binop(Iop_InterleaveHI8x16
,
17546 binop(Iop_InterleaveLO8x16
,
17547 mkexpr(zeroVec
), mkexpr(srcVec
)) ),
17548 binop(Iop_InterleaveLO8x16
,
17550 binop(Iop_InterleaveLO8x16
,
17551 mkexpr(zeroVec
), mkexpr(srcVec
)) ) );
17553 res
= binop(Iop_SarN32x8
,
17554 binop(Iop_ShlN32x8
, res
, mkU8(24)), mkU8(24));
17556 putYMMReg ( rG
, res
);
17562 /* Handles 128 bit versions of PMOVSXBQ. */
17563 static Long
dis_PMOVSXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17564 Long delta
, Bool isAvx
)
17566 IRTemp addr
= IRTemp_INVALID
;
17569 IRTemp srcBytes
= newTemp(Ity_I16
);
17570 UChar modrm
= getUChar(delta
);
17571 const HChar
* mbV
= isAvx
? "v" : "";
17572 UInt rG
= gregOfRexRM(pfx
, modrm
);
17573 if ( epartIsReg(modrm
) ) {
17574 UInt rE
= eregOfRexRM(pfx
, modrm
);
17575 assign( srcBytes
, getXMMRegLane16( rE
, 0 ) );
17577 DIP( "%spmovsxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17579 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17580 assign( srcBytes
, loadLE( Ity_I16
, mkexpr(addr
) ) );
17582 DIP( "%spmovsxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17585 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17586 ( rG
, binop( Iop_64HLtoV128
,
17588 unop( Iop_16HIto8
, mkexpr(srcBytes
) ) ),
17590 unop( Iop_16to8
, mkexpr(srcBytes
) ) ) ) );
17595 /* Handles 256 bit versions of PMOVSXBQ. */
17596 static Long
dis_PMOVSXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17599 IRTemp addr
= IRTemp_INVALID
;
17602 IRTemp srcBytes
= newTemp(Ity_I32
);
17603 UChar modrm
= getUChar(delta
);
17604 UInt rG
= gregOfRexRM(pfx
, modrm
);
17605 if ( epartIsReg(modrm
) ) {
17606 UInt rE
= eregOfRexRM(pfx
, modrm
);
17607 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17609 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17611 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17612 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17614 DIP( "vpmovsxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17618 ( rG
, binop( Iop_V128HLtoV256
,
17619 binop( Iop_64HLtoV128
,
17622 unop( Iop_32HIto16
,
17623 mkexpr(srcBytes
) ) ) ),
17626 unop( Iop_32HIto16
,
17627 mkexpr(srcBytes
) ) ) ) ),
17628 binop( Iop_64HLtoV128
,
17632 mkexpr(srcBytes
) ) ) ),
17636 mkexpr(srcBytes
) ) ) ) ) ) );
17641 /* Handles 128 bit versions of PMOVZXBQ. */
17642 static Long
dis_PMOVZXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17643 Long delta
, Bool isAvx
)
17645 IRTemp addr
= IRTemp_INVALID
;
17648 IRTemp srcVec
= newTemp(Ity_V128
);
17649 UChar modrm
= getUChar(delta
);
17650 const HChar
* mbV
= isAvx
? "v" : "";
17651 UInt rG
= gregOfRexRM(pfx
, modrm
);
17652 if ( epartIsReg(modrm
) ) {
17653 UInt rE
= eregOfRexRM(pfx
, modrm
);
17654 assign( srcVec
, getXMMReg(rE
) );
17656 DIP( "%spmovzxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17658 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17660 unop( Iop_32UtoV128
,
17661 unop( Iop_16Uto32
, loadLE( Ity_I16
, mkexpr(addr
) ))));
17663 DIP( "%spmovzxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17666 IRTemp zeroVec
= newTemp(Ity_V128
);
17667 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17669 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17670 ( rG
, binop( Iop_InterleaveLO8x16
,
17672 binop( Iop_InterleaveLO8x16
,
17674 binop( Iop_InterleaveLO8x16
,
17675 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17680 /* Handles 256 bit versions of PMOVZXBQ. */
17681 static Long
dis_PMOVZXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17684 IRTemp addr
= IRTemp_INVALID
;
17687 IRTemp srcVec
= newTemp(Ity_V128
);
17688 UChar modrm
= getUChar(delta
);
17689 UInt rG
= gregOfRexRM(pfx
, modrm
);
17690 if ( epartIsReg(modrm
) ) {
17691 UInt rE
= eregOfRexRM(pfx
, modrm
);
17692 assign( srcVec
, getXMMReg(rE
) );
17694 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17696 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17698 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) )));
17700 DIP( "vpmovzxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17703 IRTemp zeroVec
= newTemp(Ity_V128
);
17704 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17707 ( rG
, binop( Iop_V128HLtoV256
,
17708 binop( Iop_InterleaveHI8x16
,
17710 binop( Iop_InterleaveLO8x16
,
17712 binop( Iop_InterleaveLO8x16
,
17713 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ),
17714 binop( Iop_InterleaveLO8x16
,
17716 binop( Iop_InterleaveLO8x16
,
17718 binop( Iop_InterleaveLO8x16
,
17719 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) )
17725 static Long
dis_PHMINPOSUW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17726 Long delta
, Bool isAvx
)
17728 IRTemp addr
= IRTemp_INVALID
;
17731 UChar modrm
= getUChar(delta
);
17732 const HChar
* mbV
= isAvx
? "v" : "";
17733 IRTemp sV
= newTemp(Ity_V128
);
17734 IRTemp sHi
= newTemp(Ity_I64
);
17735 IRTemp sLo
= newTemp(Ity_I64
);
17736 IRTemp dLo
= newTemp(Ity_I64
);
17737 UInt rG
= gregOfRexRM(pfx
,modrm
);
17738 if (epartIsReg(modrm
)) {
17739 UInt rE
= eregOfRexRM(pfx
,modrm
);
17740 assign( sV
, getXMMReg(rE
) );
17742 DIP("%sphminposuw %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
17744 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17746 gen_SEGV_if_not_16_aligned(addr
);
17747 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
17749 DIP("%sphminposuw %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
));
17751 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
17752 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
17753 assign( dLo
, mkIRExprCCall(
17754 Ity_I64
, 0/*regparms*/,
17755 "amd64g_calculate_sse_phminposuw",
17756 &amd64g_calculate_sse_phminposuw
,
17757 mkIRExprVec_2( mkexpr(sLo
), mkexpr(sHi
) )
17759 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17760 (rG
, unop(Iop_64UtoV128
, mkexpr(dLo
)));
17765 static Long
dis_AESx ( const VexAbiInfo
* vbi
, Prefix pfx
,
17766 Long delta
, Bool isAvx
, UChar opc
)
17768 IRTemp addr
= IRTemp_INVALID
;
17771 UChar modrm
= getUChar(delta
);
17772 UInt rG
= gregOfRexRM(pfx
, modrm
);
17774 UInt regNoR
= (isAvx
&& opc
!= 0xDB) ? getVexNvvvv(pfx
) : rG
;
17776 /* This is a nasty kludge. We need to pass 2 x V128 to the
17777 helper. Since we can't do that, use a dirty
17778 helper to compute the results directly from the XMM regs in
17779 the guest state. That means for the memory case, we need to
17780 move the left operand into a pseudo-register (XMM16, let's
17782 if (epartIsReg(modrm
)) {
17783 regNoL
= eregOfRexRM(pfx
, modrm
);
17786 regNoL
= 16; /* use XMM16 as an intermediary */
17787 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17788 /* alignment check needed ???? */
17789 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17793 void* fn
= &amd64g_dirtyhelper_AES
;
17794 const HChar
* nm
= "amd64g_dirtyhelper_AES";
17796 /* Round up the arguments. Note that this is a kludge -- the
17797 use of mkU64 rather than mkIRExpr_HWord implies the
17798 assumption that the host's word size is 64-bit. */
17799 UInt gstOffD
= ymmGuestRegOffset(rG
);
17800 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17801 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17802 IRExpr
* opc4
= mkU64(opc
);
17803 IRExpr
* gstOffDe
= mkU64(gstOffD
);
17804 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17805 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17807 = mkIRExprVec_5( IRExpr_GSPTR(), opc4
, gstOffDe
, gstOffLe
, gstOffRe
);
17809 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17810 /* It's not really a dirty call, but we can't use the clean helper
17811 mechanism here for the very lame reason that we can't pass 2 x
17812 V128s by value to a helper. Hence this roundabout scheme. */
17814 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17815 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17816 the second for !isAvx or the third for isAvx.
17817 AESIMC (0xDB) reads the first register, and writes the second. */
17818 d
->fxState
[0].fx
= Ifx_Read
;
17819 d
->fxState
[0].offset
= gstOffL
;
17820 d
->fxState
[0].size
= sizeof(U128
);
17821 d
->fxState
[1].offset
= gstOffR
;
17822 d
->fxState
[1].size
= sizeof(U128
);
17824 d
->fxState
[1].fx
= Ifx_Write
;
17825 else if (!isAvx
|| rG
== regNoR
)
17826 d
->fxState
[1].fx
= Ifx_Modify
;
17828 d
->fxState
[1].fx
= Ifx_Read
;
17830 d
->fxState
[2].fx
= Ifx_Write
;
17831 d
->fxState
[2].offset
= gstOffD
;
17832 d
->fxState
[2].size
= sizeof(U128
);
17835 stmt( IRStmt_Dirty(d
) );
17837 const HChar
* opsuf
;
17839 case 0xDC: opsuf
= "enc"; break;
17840 case 0XDD: opsuf
= "enclast"; break;
17841 case 0xDE: opsuf
= "dec"; break;
17842 case 0xDF: opsuf
= "declast"; break;
17843 case 0xDB: opsuf
= "imc"; break;
17844 default: vassert(0);
17846 DIP("%saes%s %s,%s%s%s\n", isAvx
? "v" : "", opsuf
,
17847 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17848 nameXMMReg(regNoR
),
17849 (isAvx
&& opc
!= 0xDB) ? "," : "",
17850 (isAvx
&& opc
!= 0xDB) ? nameXMMReg(rG
) : "");
17853 putYMMRegLane128( rG
, 1, mkV128(0) );
17857 static Long
dis_AESKEYGENASSIST ( const VexAbiInfo
* vbi
, Prefix pfx
,
17858 Long delta
, Bool isAvx
)
17860 IRTemp addr
= IRTemp_INVALID
;
17863 UChar modrm
= getUChar(delta
);
17865 UInt regNoR
= gregOfRexRM(pfx
, modrm
);
17868 /* This is a nasty kludge. See AESENC et al. instructions. */
17869 modrm
= getUChar(delta
);
17870 if (epartIsReg(modrm
)) {
17871 regNoL
= eregOfRexRM(pfx
, modrm
);
17872 imm
= getUChar(delta
+1);
17875 regNoL
= 16; /* use XMM16 as an intermediary */
17876 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
17877 /* alignment check ???? . */
17878 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17879 imm
= getUChar(delta
+alen
);
17883 /* Who ya gonna call? Presumably not Ghostbusters. */
17884 void* fn
= &amd64g_dirtyhelper_AESKEYGENASSIST
;
17885 const HChar
* nm
= "amd64g_dirtyhelper_AESKEYGENASSIST";
17887 /* Round up the arguments. Note that this is a kludge -- the
17888 use of mkU64 rather than mkIRExpr_HWord implies the
17889 assumption that the host's word size is 64-bit. */
17890 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17891 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17893 IRExpr
* imme
= mkU64(imm
& 0xFF);
17894 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17895 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17897 = mkIRExprVec_4( IRExpr_GSPTR(), imme
, gstOffLe
, gstOffRe
);
17899 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17900 /* It's not really a dirty call, but we can't use the clean helper
17901 mechanism here for the very lame reason that we can't pass 2 x
17902 V128s by value to a helper. Hence this roundabout scheme. */
17904 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17905 d
->fxState
[0].fx
= Ifx_Read
;
17906 d
->fxState
[0].offset
= gstOffL
;
17907 d
->fxState
[0].size
= sizeof(U128
);
17908 d
->fxState
[1].fx
= Ifx_Write
;
17909 d
->fxState
[1].offset
= gstOffR
;
17910 d
->fxState
[1].size
= sizeof(U128
);
17911 stmt( IRStmt_Dirty(d
) );
17913 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx
? "v" : "", (UInt
)imm
,
17914 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17915 nameXMMReg(regNoR
));
17917 putYMMRegLane128( regNoR
, 1, mkV128(0) );
17922 __attribute__((noinline
))
17924 Long
dis_ESC_0F38__SSE4 ( Bool
* decode_OK
,
17925 const VexAbiInfo
* vbi
,
17926 Prefix pfx
, Int sz
, Long deltaIN
)
17928 IRTemp addr
= IRTemp_INVALID
;
17933 *decode_OK
= False
;
17935 Long delta
= deltaIN
;
17936 UChar opc
= getUChar(delta
);
17943 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17944 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17945 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17946 Blend at various granularities, with XMM0 (implicit operand)
17947 providing the controlling mask.
17949 if (have66noF2noF3(pfx
) && sz
== 2) {
17950 modrm
= getUChar(delta
);
17952 const HChar
* nm
= NULL
;
17954 IROp opSAR
= Iop_INVALID
;
17957 nm
= "pblendvb"; gran
= 1; opSAR
= Iop_SarN8x16
;
17960 nm
= "blendvps"; gran
= 4; opSAR
= Iop_SarN32x4
;
17963 nm
= "blendvpd"; gran
= 8; opSAR
= Iop_SarN64x2
;
17968 IRTemp vecE
= newTemp(Ity_V128
);
17969 IRTemp vecG
= newTemp(Ity_V128
);
17970 IRTemp vec0
= newTemp(Ity_V128
);
17972 if ( epartIsReg(modrm
) ) {
17973 assign(vecE
, getXMMReg(eregOfRexRM(pfx
, modrm
)));
17975 DIP( "%s %s,%s\n", nm
,
17976 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
17977 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17979 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17980 gen_SEGV_if_not_16_aligned( addr
);
17981 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
17983 DIP( "%s %s,%s\n", nm
,
17984 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17987 assign(vecG
, getXMMReg(gregOfRexRM(pfx
, modrm
)));
17988 assign(vec0
, getXMMReg(0));
17990 IRTemp res
= math_PBLENDVB_128( vecE
, vecG
, vec0
, gran
, opSAR
);
17991 putXMMReg(gregOfRexRM(pfx
, modrm
), mkexpr(res
));
17993 goto decode_success
;
17998 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17999 Logical compare (set ZF and CF from AND/ANDN of the operands) */
18000 if (have66noF2noF3(pfx
)
18001 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
18002 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, False
/*!isAvx*/, 0 );
18003 goto decode_success
;
18008 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
18009 Packed Move with Sign Extend from Byte to Word (XMM) */
18010 if (have66noF2noF3(pfx
) && sz
== 2) {
18011 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18012 False
/*!isAvx*/, False
/*!xIsZ*/ );
18013 goto decode_success
;
18018 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
18019 Packed Move with Sign Extend from Byte to DWord (XMM) */
18020 if (have66noF2noF3(pfx
) && sz
== 2) {
18021 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18022 False
/*!isAvx*/, False
/*!xIsZ*/ );
18023 goto decode_success
;
18028 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
18029 Packed Move with Sign Extend from Byte to QWord (XMM) */
18030 if (have66noF2noF3(pfx
) && sz
== 2) {
18031 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18032 goto decode_success
;
18037 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
18038 Packed Move with Sign Extend from Word to DWord (XMM) */
18039 if (have66noF2noF3(pfx
) && sz
== 2) {
18040 delta
= dis_PMOVxXWD_128(vbi
, pfx
, delta
,
18041 False
/*!isAvx*/, False
/*!xIsZ*/);
18042 goto decode_success
;
18047 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
18048 Packed Move with Sign Extend from Word to QWord (XMM) */
18049 if (have66noF2noF3(pfx
) && sz
== 2) {
18050 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18051 goto decode_success
;
18056 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
18057 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
18058 if (have66noF2noF3(pfx
) && sz
== 2) {
18059 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18060 False
/*!isAvx*/, False
/*!xIsZ*/ );
18061 goto decode_success
;
18066 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
18067 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
18069 /* This is a really poor translation -- could be improved if
18070 performance critical. It's a copy-paste of PMULUDQ, too. */
18071 if (have66noF2noF3(pfx
) && sz
== 2) {
18072 IRTemp sV
= newTemp(Ity_V128
);
18073 IRTemp dV
= newTemp(Ity_V128
);
18074 modrm
= getUChar(delta
);
18075 UInt rG
= gregOfRexRM(pfx
,modrm
);
18076 assign( dV
, getXMMReg(rG
) );
18077 if (epartIsReg(modrm
)) {
18078 UInt rE
= eregOfRexRM(pfx
,modrm
);
18079 assign( sV
, getXMMReg(rE
) );
18081 DIP("pmuldq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
18083 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18084 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
18086 DIP("pmuldq %s,%s\n", dis_buf
, nameXMMReg(rG
));
18089 putXMMReg( rG
, mkexpr(math_PMULDQ_128( dV
, sV
)) );
18090 goto decode_success
;
18095 /* 66 0F 38 29 = PCMPEQQ
18096 64x2 equality comparison */
18097 if (have66noF2noF3(pfx
) && sz
== 2) {
18098 /* FIXME: this needs an alignment check */
18099 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18100 "pcmpeqq", Iop_CmpEQ64x2
, False
);
18101 goto decode_success
;
18106 /* 66 0F 38 2A = MOVNTDQA
18107 "non-temporal" "streaming" load
18108 Handle like MOVDQA but only memory operand is allowed */
18109 if (have66noF2noF3(pfx
) && sz
== 2) {
18110 modrm
= getUChar(delta
);
18111 if (!epartIsReg(modrm
)) {
18112 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18113 gen_SEGV_if_not_16_aligned( addr
);
18114 putXMMReg( gregOfRexRM(pfx
,modrm
),
18115 loadLE(Ity_V128
, mkexpr(addr
)) );
18116 DIP("movntdqa %s,%s\n", dis_buf
,
18117 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
18119 goto decode_success
;
18125 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18126 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18127 if (have66noF2noF3(pfx
) && sz
== 2) {
18129 modrm
= getUChar(delta
);
18131 IRTemp argL
= newTemp(Ity_V128
);
18132 IRTemp argR
= newTemp(Ity_V128
);
18134 if ( epartIsReg(modrm
) ) {
18135 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18137 DIP( "packusdw %s,%s\n",
18138 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18139 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18141 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18142 gen_SEGV_if_not_16_aligned( addr
);
18143 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18145 DIP( "packusdw %s,%s\n",
18146 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18149 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18151 putXMMReg( gregOfRexRM(pfx
, modrm
),
18152 binop( Iop_QNarrowBin32Sto16Ux8
,
18153 mkexpr(argL
), mkexpr(argR
)) );
18155 goto decode_success
;
18160 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18161 Packed Move with Zero Extend from Byte to Word (XMM) */
18162 if (have66noF2noF3(pfx
) && sz
== 2) {
18163 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18164 False
/*!isAvx*/, True
/*xIsZ*/ );
18165 goto decode_success
;
18170 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18171 Packed Move with Zero Extend from Byte to DWord (XMM) */
18172 if (have66noF2noF3(pfx
) && sz
== 2) {
18173 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18174 False
/*!isAvx*/, True
/*xIsZ*/ );
18175 goto decode_success
;
18180 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18181 Packed Move with Zero Extend from Byte to QWord (XMM) */
18182 if (have66noF2noF3(pfx
) && sz
== 2) {
18183 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18184 goto decode_success
;
18189 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18190 Packed Move with Zero Extend from Word to DWord (XMM) */
18191 if (have66noF2noF3(pfx
) && sz
== 2) {
18192 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
18193 False
/*!isAvx*/, True
/*xIsZ*/ );
18194 goto decode_success
;
18199 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18200 Packed Move with Zero Extend from Word to QWord (XMM) */
18201 if (have66noF2noF3(pfx
) && sz
== 2) {
18202 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18203 goto decode_success
;
18208 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18209 Packed Move with Zero Extend from DWord to QWord (XMM) */
18210 if (have66noF2noF3(pfx
) && sz
== 2) {
18211 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18212 False
/*!isAvx*/, True
/*xIsZ*/ );
18213 goto decode_success
;
18218 /* 66 0F 38 37 = PCMPGTQ
18219 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18221 if (have66noF2noF3(pfx
) && sz
== 2) {
18222 /* FIXME: this needs an alignment check */
18223 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18224 "pcmpgtq", Iop_CmpGT64Sx2
, False
);
18225 goto decode_success
;
18231 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18232 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18234 if (have66noF2noF3(pfx
) && sz
== 2) {
18235 /* FIXME: this needs an alignment check */
18236 Bool isMAX
= opc
== 0x3C;
18237 delta
= dis_SSEint_E_to_G(
18239 isMAX
? "pmaxsb" : "pminsb",
18240 isMAX
? Iop_Max8Sx16
: Iop_Min8Sx16
,
18243 goto decode_success
;
18249 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18250 Minimum of Packed Signed Double Word Integers (XMM)
18251 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18252 Maximum of Packed Signed Double Word Integers (XMM)
18254 if (have66noF2noF3(pfx
) && sz
== 2) {
18255 /* FIXME: this needs an alignment check */
18256 Bool isMAX
= opc
== 0x3D;
18257 delta
= dis_SSEint_E_to_G(
18259 isMAX
? "pmaxsd" : "pminsd",
18260 isMAX
? Iop_Max32Sx4
: Iop_Min32Sx4
,
18263 goto decode_success
;
18269 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18270 Minimum of Packed Unsigned Word Integers (XMM)
18271 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18272 Maximum of Packed Unsigned Word Integers (XMM)
18274 if (have66noF2noF3(pfx
) && sz
== 2) {
18275 /* FIXME: this needs an alignment check */
18276 Bool isMAX
= opc
== 0x3E;
18277 delta
= dis_SSEint_E_to_G(
18279 isMAX
? "pmaxuw" : "pminuw",
18280 isMAX
? Iop_Max16Ux8
: Iop_Min16Ux8
,
18283 goto decode_success
;
18289 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18290 Minimum of Packed Unsigned Doubleword Integers (XMM)
18291 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18292 Maximum of Packed Unsigned Doubleword Integers (XMM)
18294 if (have66noF2noF3(pfx
) && sz
== 2) {
18295 /* FIXME: this needs an alignment check */
18296 Bool isMAX
= opc
== 0x3F;
18297 delta
= dis_SSEint_E_to_G(
18299 isMAX
? "pmaxud" : "pminud",
18300 isMAX
? Iop_Max32Ux4
: Iop_Min32Ux4
,
18303 goto decode_success
;
18308 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18309 32x4 integer multiply from xmm2/m128 to xmm1 */
18310 if (have66noF2noF3(pfx
) && sz
== 2) {
18312 modrm
= getUChar(delta
);
18314 IRTemp argL
= newTemp(Ity_V128
);
18315 IRTemp argR
= newTemp(Ity_V128
);
18317 if ( epartIsReg(modrm
) ) {
18318 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18320 DIP( "pmulld %s,%s\n",
18321 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18322 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18324 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18325 gen_SEGV_if_not_16_aligned( addr
);
18326 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18328 DIP( "pmulld %s,%s\n",
18329 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18332 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18334 putXMMReg( gregOfRexRM(pfx
, modrm
),
18335 binop( Iop_Mul32x4
, mkexpr(argL
), mkexpr(argR
)) );
18337 goto decode_success
;
18342 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18343 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18344 if (have66noF2noF3(pfx
) && sz
== 2) {
18345 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18346 goto decode_success
;
18355 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18356 DD /r = AESENCLAST xmm1, xmm2/m128
18357 DE /r = AESDEC xmm1, xmm2/m128
18358 DF /r = AESDECLAST xmm1, xmm2/m128
18360 DB /r = AESIMC xmm1, xmm2/m128 */
18361 if (have66noF2noF3(pfx
) && sz
== 2) {
18362 delta
= dis_AESx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
18363 goto decode_success
;
18369 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18370 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18371 The decoding on this is a bit unusual.
18373 if (haveF2noF3(pfx
)
18374 && (opc
== 0xF1 || (opc
== 0xF0 && !have66(pfx
)))) {
18375 modrm
= getUChar(delta
);
18380 vassert(sz
== 2 || sz
== 4 || sz
== 8);
18382 IRType tyE
= szToITy(sz
);
18383 IRTemp valE
= newTemp(tyE
);
18385 if (epartIsReg(modrm
)) {
18386 assign(valE
, getIRegE(sz
, pfx
, modrm
));
18388 DIP("crc32b %s,%s\n", nameIRegE(sz
, pfx
, modrm
),
18389 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18391 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18392 assign(valE
, loadLE(tyE
, mkexpr(addr
)));
18394 DIP("crc32b %s,%s\n", dis_buf
,
18395 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18398 /* Somewhat funny getting/putting of the crc32 value, in order
18399 to ensure that it turns into 64-bit gets and puts. However,
18400 mask off the upper 32 bits so as to not get memcheck false
18401 +ves around the helper call. */
18402 IRTemp valG0
= newTemp(Ity_I64
);
18403 assign(valG0
, binop(Iop_And64
, getIRegG(8, pfx
, modrm
),
18404 mkU64(0xFFFFFFFF)));
18406 const HChar
* nm
= NULL
;
18409 case 1: nm
= "amd64g_calc_crc32b";
18410 fn
= &amd64g_calc_crc32b
; break;
18411 case 2: nm
= "amd64g_calc_crc32w";
18412 fn
= &amd64g_calc_crc32w
; break;
18413 case 4: nm
= "amd64g_calc_crc32l";
18414 fn
= &amd64g_calc_crc32l
; break;
18415 case 8: nm
= "amd64g_calc_crc32q";
18416 fn
= &amd64g_calc_crc32q
; break;
18419 IRTemp valG1
= newTemp(Ity_I64
);
18421 mkIRExprCCall(Ity_I64
, 0/*regparm*/, nm
, fn
,
18422 mkIRExprVec_2(mkexpr(valG0
),
18423 widenUto64(mkexpr(valE
)))));
18425 putIRegG(4, pfx
, modrm
, unop(Iop_64to32
, mkexpr(valG1
)));
18426 goto decode_success
;
18436 *decode_OK
= False
;
18445 /*------------------------------------------------------------*/
18447 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18449 /*------------------------------------------------------------*/
18451 static Long
dis_PEXTRW ( const VexAbiInfo
* vbi
, Prefix pfx
,
18452 Long delta
, Bool isAvx
)
18454 IRTemp addr
= IRTemp_INVALID
;
18455 IRTemp t0
= IRTemp_INVALID
;
18456 IRTemp t1
= IRTemp_INVALID
;
18457 IRTemp t2
= IRTemp_INVALID
;
18458 IRTemp t3
= IRTemp_INVALID
;
18459 UChar modrm
= getUChar(delta
);
18462 UInt rG
= gregOfRexRM(pfx
,modrm
);
18464 IRTemp xmm_vec
= newTemp(Ity_V128
);
18465 IRTemp d16
= newTemp(Ity_I16
);
18466 const HChar
* mbV
= isAvx
? "v" : "";
18468 vassert(0==getRexW(pfx
)); /* ensured by caller */
18469 assign( xmm_vec
, getXMMReg(rG
) );
18470 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18472 if ( epartIsReg( modrm
) ) {
18473 imm8_20
= (Int
)(getUChar(delta
+1) & 7);
18475 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18476 imm8_20
= (Int
)(getUChar(delta
+alen
) & 7);
18480 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(t0
))); break;
18481 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
18482 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(t1
))); break;
18483 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
18484 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(t2
))); break;
18485 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
18486 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(t3
))); break;
18487 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
18488 default: vassert(0);
18491 if ( epartIsReg( modrm
) ) {
18492 UInt rE
= eregOfRexRM(pfx
,modrm
);
18493 putIReg32( rE
, unop(Iop_16Uto32
, mkexpr(d16
)) );
18495 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
,
18496 nameXMMReg( rG
), nameIReg32( rE
) );
18498 storeLE( mkexpr(addr
), mkexpr(d16
) );
18500 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
, nameXMMReg( rG
), dis_buf
);
18506 static Long
dis_PEXTRD ( const VexAbiInfo
* vbi
, Prefix pfx
,
18507 Long delta
, Bool isAvx
)
18509 IRTemp addr
= IRTemp_INVALID
;
18510 IRTemp t0
= IRTemp_INVALID
;
18511 IRTemp t1
= IRTemp_INVALID
;
18512 IRTemp t2
= IRTemp_INVALID
;
18513 IRTemp t3
= IRTemp_INVALID
;
18519 IRTemp xmm_vec
= newTemp(Ity_V128
);
18520 IRTemp src_dword
= newTemp(Ity_I32
);
18521 const HChar
* mbV
= isAvx
? "v" : "";
18523 vassert(0==getRexW(pfx
)); /* ensured by caller */
18524 modrm
= getUChar(delta
);
18525 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18526 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18528 if ( epartIsReg( modrm
) ) {
18529 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
18531 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18532 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
18535 switch ( imm8_10
) {
18536 case 0: assign( src_dword
, mkexpr(t0
) ); break;
18537 case 1: assign( src_dword
, mkexpr(t1
) ); break;
18538 case 2: assign( src_dword
, mkexpr(t2
) ); break;
18539 case 3: assign( src_dword
, mkexpr(t3
) ); break;
18540 default: vassert(0);
18543 if ( epartIsReg( modrm
) ) {
18544 putIReg32( eregOfRexRM(pfx
,modrm
), mkexpr(src_dword
) );
18546 DIP( "%spextrd $%d, %s,%s\n", mbV
, imm8_10
,
18547 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18548 nameIReg32( eregOfRexRM(pfx
, modrm
) ) );
18550 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
18552 DIP( "%spextrd $%d, %s,%s\n", mbV
,
18553 imm8_10
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18559 static Long
dis_PEXTRQ ( const VexAbiInfo
* vbi
, Prefix pfx
,
18560 Long delta
, Bool isAvx
)
18562 IRTemp addr
= IRTemp_INVALID
;
18568 IRTemp xmm_vec
= newTemp(Ity_V128
);
18569 IRTemp src_qword
= newTemp(Ity_I64
);
18570 const HChar
* mbV
= isAvx
? "v" : "";
18572 vassert(1==getRexW(pfx
)); /* ensured by caller */
18573 modrm
= getUChar(delta
);
18574 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18576 if ( epartIsReg( modrm
) ) {
18577 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
18579 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18580 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
18583 switch ( imm8_0
) {
18584 case 0: assign( src_qword
, unop(Iop_V128to64
, mkexpr(xmm_vec
)) );
18586 case 1: assign( src_qword
, unop(Iop_V128HIto64
, mkexpr(xmm_vec
)) );
18588 default: vassert(0);
18591 if ( epartIsReg( modrm
) ) {
18592 putIReg64( eregOfRexRM(pfx
,modrm
), mkexpr(src_qword
) );
18594 DIP( "%spextrq $%d, %s,%s\n", mbV
, imm8_0
,
18595 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18596 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
18598 storeLE( mkexpr(addr
), mkexpr(src_qword
) );
18600 DIP( "%spextrq $%d, %s,%s\n", mbV
,
18601 imm8_0
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18606 static IRExpr
* math_CTZ32(IRExpr
*exp
)
18608 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18609 return unop(Iop_64to32
, unop(Iop_Ctz64
, unop(Iop_32Uto64
, exp
)));
18612 static Long
dis_PCMPISTRI_3A ( UChar modrm
, UInt regNoL
, UInt regNoR
,
18613 Long delta
, UChar opc
, UChar imm
,
18616 /* We only handle PCMPISTRI for now */
18617 vassert((opc
& 0x03) == 0x03);
18618 /* And only an immediate byte of 0x38 or 0x3A */
18619 vassert((imm
& ~0x02) == 0x38);
18621 /* FIXME: Is this correct when RegNoL == 16 ? */
18622 IRTemp argL
= newTemp(Ity_V128
);
18623 assign(argL
, getXMMReg(regNoL
));
18624 IRTemp argR
= newTemp(Ity_V128
);
18625 assign(argR
, getXMMReg(regNoR
));
18627 IRTemp zmaskL
= newTemp(Ity_I32
);
18628 assign(zmaskL
, unop(Iop_16Uto32
,
18629 unop(Iop_GetMSBs8x16
,
18630 binop(Iop_CmpEQ8x16
, mkexpr(argL
), mkV128(0)))));
18631 IRTemp zmaskR
= newTemp(Ity_I32
);
18632 assign(zmaskR
, unop(Iop_16Uto32
,
18633 unop(Iop_GetMSBs8x16
,
18634 binop(Iop_CmpEQ8x16
, mkexpr(argR
), mkV128(0)))));
18636 /* We want validL = ~(zmaskL | -zmaskL)
18638 But this formulation kills memcheck's validity tracking when any
18639 bits above the first "1" are invalid. So reformulate as:
18641 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18644 IRExpr
*ctzL
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskL
)));
18646 /* Generate a bool expression which is zero iff the original is
18647 zero. Do this carefully so memcheck can propagate validity bits
18650 IRTemp zmaskL_zero
= newTemp(Ity_I1
);
18651 assign(zmaskL_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskL
), mkU32(0)));
18653 IRTemp validL
= newTemp(Ity_I32
);
18654 assign(validL
, binop(Iop_Sub32
,
18655 IRExpr_ITE(mkexpr(zmaskL_zero
),
18656 binop(Iop_Shl32
, mkU32(1), ctzL
),
18660 /* And similarly for validR. */
18661 IRExpr
*ctzR
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskR
)));
18662 IRTemp zmaskR_zero
= newTemp(Ity_I1
);
18663 assign(zmaskR_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskR
), mkU32(0)));
18664 IRTemp validR
= newTemp(Ity_I32
);
18665 assign(validR
, binop(Iop_Sub32
,
18666 IRExpr_ITE(mkexpr(zmaskR_zero
),
18667 binop(Iop_Shl32
, mkU32(1), ctzR
),
18671 /* Do the actual comparison. */
18672 IRExpr
*boolResII
= unop(Iop_16Uto32
,
18673 unop(Iop_GetMSBs8x16
,
18674 binop(Iop_CmpEQ8x16
, mkexpr(argL
),
18677 /* Compute boolresII & validL & validR (i.e., if both valid, use
18678 comparison result) */
18679 IRExpr
*intRes1_a
= binop(Iop_And32
, boolResII
,
18681 mkexpr(validL
), mkexpr(validR
)));
18683 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18684 IRExpr
*intRes1_b
= unop(Iop_Not32
, binop(Iop_Or32
,
18685 mkexpr(validL
), mkexpr(validR
)));
18686 /* Otherwise, zero. */
18687 IRExpr
*intRes1
= binop(Iop_And32
, mkU32(0xFFFF),
18688 binop(Iop_Or32
, intRes1_a
, intRes1_b
));
18690 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18692 IRTemp intRes2
= newTemp(Ity_I32
);
18693 assign(intRes2
, binop(Iop_And32
, mkU32(0xFFFF),
18694 binop(Iop_Xor32
, intRes1
, mkexpr(validL
))));
18696 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18697 of the msb. Since it is clear, we return the index of the
18699 IRExpr
*newECX
= math_CTZ32(binop(Iop_Or32
,
18700 mkexpr(intRes2
), mkU32(0x10000)));
18702 /* And thats our rcx. */
18703 putIReg32(R_RCX
, newECX
);
18705 /* Now for the condition codes... */
18707 /* C == 0 iff intRes2 == 0 */
18708 IRExpr
*c_bit
= IRExpr_ITE( binop(Iop_ExpCmpNE32
, mkexpr(intRes2
),
18710 mkU32(1 << AMD64G_CC_SHIFT_C
),
18712 /* Z == 1 iff any in argL is 0 */
18713 IRExpr
*z_bit
= IRExpr_ITE( mkexpr(zmaskL_zero
),
18714 mkU32(1 << AMD64G_CC_SHIFT_Z
),
18716 /* S == 1 iff any in argR is 0 */
18717 IRExpr
*s_bit
= IRExpr_ITE( mkexpr(zmaskR_zero
),
18718 mkU32(1 << AMD64G_CC_SHIFT_S
),
18720 /* O == IntRes2[0] */
18721 IRExpr
*o_bit
= binop(Iop_Shl32
, binop(Iop_And32
, mkexpr(intRes2
),
18723 mkU8(AMD64G_CC_SHIFT_O
));
18725 /* Put them all together */
18726 IRTemp cc
= newTemp(Ity_I64
);
18727 assign(cc
, widenUto64(binop(Iop_Or32
,
18728 binop(Iop_Or32
, c_bit
, z_bit
),
18729 binop(Iop_Or32
, s_bit
, o_bit
))));
18730 stmt(IRStmt_Put(OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
)));
18731 stmt(IRStmt_Put(OFFB_CC_DEP1
, mkexpr(cc
)));
18732 stmt(IRStmt_Put(OFFB_CC_DEP2
, mkU64(0)));
18733 stmt(IRStmt_Put(OFFB_CC_NDEP
, mkU64(0)));
18738 /* This can fail, in which case it returns the original (unchanged)
18740 static Long
dis_PCMPxSTRx ( const VexAbiInfo
* vbi
, Prefix pfx
,
18741 Long delta
, Bool isAvx
, UChar opc
)
18743 Long delta0
= delta
;
18744 UInt isISTRx
= opc
& 2;
18745 UInt isxSTRM
= (opc
& 1) ^ 1;
18749 IRTemp addr
= IRTemp_INVALID
;
18753 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18754 (which is clean). Since we can't do that, use a dirty helper to
18755 compute the results directly from the XMM regs in the guest
18756 state. That means for the memory case, we need to move the left
18757 operand into a pseudo-register (XMM16, let's call it). */
18758 UChar modrm
= getUChar(delta
);
18759 if (epartIsReg(modrm
)) {
18760 regNoL
= eregOfRexRM(pfx
, modrm
);
18761 regNoR
= gregOfRexRM(pfx
, modrm
);
18762 imm
= getUChar(delta
+1);
18765 regNoL
= 16; /* use XMM16 as an intermediary */
18766 regNoR
= gregOfRexRM(pfx
, modrm
);
18767 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18768 /* No alignment check; I guess that makes sense, given that
18769 these insns are for dealing with C style strings. */
18770 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
18771 imm
= getUChar(delta
+alen
);
18775 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18777 if (regNoL
== 16) {
18778 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18779 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18780 (UInt
)imm
, dis_buf
, nameXMMReg(regNoR
));
18782 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18783 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18784 (UInt
)imm
, nameXMMReg(regNoL
), nameXMMReg(regNoR
));
18787 /* Handle special case(s). */
18788 if (imm
== 0x3A && isISTRx
&& !isxSTRM
) {
18789 return dis_PCMPISTRI_3A ( modrm
, regNoL
, regNoR
, delta
,
18790 opc
, imm
, dis_buf
);
18793 /* Now we know the XMM reg numbers for the operands, and the
18794 immediate byte. Is it one we can actually handle? Throw out any
18795 cases for which the helper function has not been verified. */
18797 case 0x00: case 0x02:
18798 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18799 case 0x10: case 0x12: case 0x14:
18800 case 0x18: case 0x1A:
18801 case 0x30: case 0x34:
18802 case 0x38: case 0x3A:
18803 case 0x40: case 0x42: case 0x44: case 0x46:
18806 case 0x70: case 0x72:
18808 // the 16-bit character versions of the above
18809 case 0x01: case 0x03:
18810 case 0x09: case 0x0B: case 0x0D:
18812 case 0x19: case 0x1B:
18813 case 0x39: case 0x3B:
18814 case 0x41: case 0x45:
18818 return delta0
; /*FAIL*/
18821 /* Who ya gonna call? Presumably not Ghostbusters. */
18822 void* fn
= &amd64g_dirtyhelper_PCMPxSTRx
;
18823 const HChar
* nm
= "amd64g_dirtyhelper_PCMPxSTRx";
18825 /* Round up the arguments. Note that this is a kludge -- the use
18826 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18827 the host's word size is 64-bit. */
18828 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
18829 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
18831 IRExpr
* opc4_and_imm
= mkU64((opc
<< 8) | (imm
& 0xFF));
18832 IRExpr
* gstOffLe
= mkU64(gstOffL
);
18833 IRExpr
* gstOffRe
= mkU64(gstOffR
);
18834 IRExpr
* edxIN
= isISTRx
? mkU64(0) : getIRegRDX(8);
18835 IRExpr
* eaxIN
= isISTRx
? mkU64(0) : getIRegRAX(8);
18837 = mkIRExprVec_6( IRExpr_GSPTR(),
18838 opc4_and_imm
, gstOffLe
, gstOffRe
, edxIN
, eaxIN
);
18840 IRTemp resT
= newTemp(Ity_I64
);
18841 IRDirty
* d
= unsafeIRDirty_1_N( resT
, 0/*regparms*/, nm
, fn
, args
);
18842 /* It's not really a dirty call, but we can't use the clean helper
18843 mechanism here for the very lame reason that we can't pass 2 x
18844 V128s by value to a helper. Hence this roundabout scheme. */
18846 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
18847 d
->fxState
[0].fx
= Ifx_Read
;
18848 d
->fxState
[0].offset
= gstOffL
;
18849 d
->fxState
[0].size
= sizeof(U128
);
18850 d
->fxState
[1].fx
= Ifx_Read
;
18851 d
->fxState
[1].offset
= gstOffR
;
18852 d
->fxState
[1].size
= sizeof(U128
);
18854 /* Declare that the helper writes XMM0. */
18856 d
->fxState
[2].fx
= Ifx_Write
;
18857 d
->fxState
[2].offset
= ymmGuestRegOffset(0);
18858 d
->fxState
[2].size
= sizeof(U128
);
18861 stmt( IRStmt_Dirty(d
) );
18863 /* Now resT[15:0] holds the new OSZACP values, so the condition
18864 codes must be updated. And for a xSTRI case, resT[31:16] holds
18865 the new ECX value, so stash that too. */
18867 putIReg64(R_RCX
, binop(Iop_And64
,
18868 binop(Iop_Shr64
, mkexpr(resT
), mkU8(16)),
18872 /* Zap the upper half of the dest reg as per AVX conventions. */
18873 if (isxSTRM
&& isAvx
)
18874 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18878 binop(Iop_And64
, mkexpr(resT
), mkU64(0xFFFF))
18880 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
18881 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
18882 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
18888 static IRTemp
math_PINSRB_128 ( IRTemp v128
, IRTemp u8
, UInt imm8
)
18890 vassert(imm8
>= 0 && imm8
<= 15);
18892 // Create a V128 value which has the selected byte in the
18893 // specified lane, and zeroes everywhere else.
18894 IRTemp tmp128
= newTemp(Ity_V128
);
18895 IRTemp halfshift
= newTemp(Ity_I64
);
18896 assign(halfshift
, binop(Iop_Shl64
,
18897 unop(Iop_8Uto64
, mkexpr(u8
)),
18898 mkU8(8 * (imm8
& 7))));
18900 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
18902 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
18905 UShort mask
= ~(1 << imm8
);
18906 IRTemp res
= newTemp(Ity_V128
);
18907 assign( res
, binop(Iop_OrV128
,
18909 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
18914 static IRTemp
math_PINSRD_128 ( IRTemp v128
, IRTemp u32
, UInt imm8
)
18916 IRTemp z32
= newTemp(Ity_I32
);
18917 assign(z32
, mkU32(0));
18919 /* Surround u32 with zeroes as per imm, giving us something we can
18920 OR into a suitably masked-out v128.*/
18921 IRTemp withZs
= newTemp(Ity_V128
);
18924 case 3: mask
= 0x0FFF;
18925 assign(withZs
, mkV128from32s(u32
, z32
, z32
, z32
));
18927 case 2: mask
= 0xF0FF;
18928 assign(withZs
, mkV128from32s(z32
, u32
, z32
, z32
));
18930 case 1: mask
= 0xFF0F;
18931 assign(withZs
, mkV128from32s(z32
, z32
, u32
, z32
));
18933 case 0: mask
= 0xFFF0;
18934 assign(withZs
, mkV128from32s(z32
, z32
, z32
, u32
));
18936 default: vassert(0);
18939 IRTemp res
= newTemp(Ity_V128
);
18940 assign(res
, binop( Iop_OrV128
,
18942 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18947 static IRTemp
math_PINSRQ_128 ( IRTemp v128
, IRTemp u64
, UInt imm8
)
18949 /* Surround u64 with zeroes as per imm, giving us something we can
18950 OR into a suitably masked-out v128.*/
18951 IRTemp withZs
= newTemp(Ity_V128
);
18955 assign(withZs
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(u64
)));
18957 vassert(imm8
== 1);
18959 assign( withZs
, binop(Iop_64HLtoV128
, mkexpr(u64
), mkU64(0)));
18962 IRTemp res
= newTemp(Ity_V128
);
18963 assign( res
, binop( Iop_OrV128
,
18965 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18970 static IRTemp
math_INSERTPS ( IRTemp dstV
, IRTemp toInsertD
, UInt imm8
)
18972 const IRTemp inval
= IRTemp_INVALID
;
18973 IRTemp dstDs
[4] = { inval
, inval
, inval
, inval
};
18974 breakupV128to32s( dstV
, &dstDs
[3], &dstDs
[2], &dstDs
[1], &dstDs
[0] );
18976 vassert(imm8
<= 255);
18977 dstDs
[(imm8
>> 4) & 3] = toInsertD
; /* "imm8_count_d" */
18979 UInt imm8_zmask
= (imm8
& 15);
18980 IRTemp zero_32
= newTemp(Ity_I32
);
18981 assign( zero_32
, mkU32(0) );
18982 IRTemp resV
= newTemp(Ity_V128
);
18983 assign( resV
, mkV128from32s(
18984 ((imm8_zmask
& 8) == 8) ? zero_32
: dstDs
[3],
18985 ((imm8_zmask
& 4) == 4) ? zero_32
: dstDs
[2],
18986 ((imm8_zmask
& 2) == 2) ? zero_32
: dstDs
[1],
18987 ((imm8_zmask
& 1) == 1) ? zero_32
: dstDs
[0]) );
18992 static Long
dis_PEXTRB_128_GtoE ( const VexAbiInfo
* vbi
, Prefix pfx
,
18993 Long delta
, Bool isAvx
)
18995 IRTemp addr
= IRTemp_INVALID
;
18998 IRTemp xmm_vec
= newTemp(Ity_V128
);
18999 IRTemp sel_lane
= newTemp(Ity_I32
);
19000 IRTemp shr_lane
= newTemp(Ity_I32
);
19001 const HChar
* mbV
= isAvx
? "v" : "";
19002 UChar modrm
= getUChar(delta
);
19003 IRTemp t3
, t2
, t1
, t0
;
19005 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
19006 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19007 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19009 if ( epartIsReg( modrm
) ) {
19010 imm8
= (Int
)getUChar(delta
+1);
19012 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19013 imm8
= (Int
)getUChar(delta
+alen
);
19015 switch ( (imm8
>> 2) & 3 ) {
19016 case 0: assign( sel_lane
, mkexpr(t0
) ); break;
19017 case 1: assign( sel_lane
, mkexpr(t1
) ); break;
19018 case 2: assign( sel_lane
, mkexpr(t2
) ); break;
19019 case 3: assign( sel_lane
, mkexpr(t3
) ); break;
19020 default: vassert(0);
19023 binop( Iop_Shr32
, mkexpr(sel_lane
), mkU8(((imm8
& 3)*8)) ) );
19025 if ( epartIsReg( modrm
) ) {
19026 putIReg64( eregOfRexRM(pfx
,modrm
),
19028 binop(Iop_And32
, mkexpr(shr_lane
), mkU32(255)) ) );
19030 DIP( "%spextrb $%d, %s,%s\n", mbV
, imm8
,
19031 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
19032 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
19034 storeLE( mkexpr(addr
), unop(Iop_32to8
, mkexpr(shr_lane
) ) );
19036 DIP( "%spextrb $%d,%s,%s\n", mbV
,
19037 imm8
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
19044 static IRTemp
math_DPPD_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
19046 vassert(imm8
< 256);
19047 UShort imm8_perms
[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
19048 IRTemp and_vec
= newTemp(Ity_V128
);
19049 IRTemp sum_vec
= newTemp(Ity_V128
);
19050 IRTemp rm
= newTemp(Ity_I32
);
19051 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19052 assign( and_vec
, binop( Iop_AndV128
,
19053 triop( Iop_Mul64Fx2
,
19055 mkexpr(dst_vec
), mkexpr(src_vec
) ),
19056 mkV128( imm8_perms
[ ((imm8
>> 4) & 3) ] ) ) );
19058 assign( sum_vec
, binop( Iop_Add64F0x2
,
19059 binop( Iop_InterleaveHI64x2
,
19060 mkexpr(and_vec
), mkexpr(and_vec
) ),
19061 binop( Iop_InterleaveLO64x2
,
19062 mkexpr(and_vec
), mkexpr(and_vec
) ) ) );
19063 IRTemp res
= newTemp(Ity_V128
);
19064 assign(res
, binop( Iop_AndV128
,
19065 binop( Iop_InterleaveLO64x2
,
19066 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19067 mkV128( imm8_perms
[ (imm8
& 3) ] ) ) );
19072 static IRTemp
math_DPPS_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
19074 vassert(imm8
< 256);
19075 IRTemp tmp_prod_vec
= newTemp(Ity_V128
);
19076 IRTemp prod_vec
= newTemp(Ity_V128
);
19077 IRTemp sum_vec
= newTemp(Ity_V128
);
19078 IRTemp rm
= newTemp(Ity_I32
);
19079 IRTemp v3
, v2
, v1
, v0
;
19080 v3
= v2
= v1
= v0
= IRTemp_INVALID
;
19081 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
19082 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
19083 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19086 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19087 assign( tmp_prod_vec
,
19088 binop( Iop_AndV128
,
19089 triop( Iop_Mul32Fx4
,
19090 mkexpr(rm
), mkexpr(dst_vec
), mkexpr(src_vec
) ),
19091 mkV128( imm8_perms
[((imm8
>> 4)& 15)] ) ) );
19092 breakupV128to32s( tmp_prod_vec
, &v3
, &v2
, &v1
, &v0
);
19093 assign( prod_vec
, mkV128from32s( v3
, v1
, v2
, v0
) );
19095 assign( sum_vec
, triop( Iop_Add32Fx4
,
19097 binop( Iop_InterleaveHI32x4
,
19098 mkexpr(prod_vec
), mkexpr(prod_vec
) ),
19099 binop( Iop_InterleaveLO32x4
,
19100 mkexpr(prod_vec
), mkexpr(prod_vec
) ) ) );
19102 IRTemp res
= newTemp(Ity_V128
);
19103 assign( res
, binop( Iop_AndV128
,
19104 triop( Iop_Add32Fx4
,
19106 binop( Iop_InterleaveHI32x4
,
19107 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19108 binop( Iop_InterleaveLO32x4
,
19109 mkexpr(sum_vec
), mkexpr(sum_vec
) ) ),
19110 mkV128( imm8_perms
[ (imm8
& 15) ] ) ) );
19115 static IRTemp
math_MPSADBW_128 ( IRTemp dst_vec
, IRTemp src_vec
, UInt imm8
)
19117 /* Mask out bits of the operands we don't need. This isn't
19118 strictly necessary, but it does ensure Memcheck doesn't
19119 give us any false uninitialised value errors as a
19121 UShort src_mask
[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19122 UShort dst_mask
[2] = { 0x07FF, 0x7FF0 };
19124 IRTemp src_maskV
= newTemp(Ity_V128
);
19125 IRTemp dst_maskV
= newTemp(Ity_V128
);
19126 assign(src_maskV
, mkV128( src_mask
[ imm8
& 3 ] ));
19127 assign(dst_maskV
, mkV128( dst_mask
[ (imm8
>> 2) & 1 ] ));
19129 IRTemp src_masked
= newTemp(Ity_V128
);
19130 IRTemp dst_masked
= newTemp(Ity_V128
);
19131 assign(src_masked
, binop(Iop_AndV128
, mkexpr(src_vec
), mkexpr(src_maskV
)));
19132 assign(dst_masked
, binop(Iop_AndV128
, mkexpr(dst_vec
), mkexpr(dst_maskV
)));
19134 /* Generate 4 64 bit values that we can hand to a clean helper */
19135 IRTemp sHi
= newTemp(Ity_I64
);
19136 IRTemp sLo
= newTemp(Ity_I64
);
19137 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(src_masked
)) );
19138 assign( sLo
, unop(Iop_V128to64
, mkexpr(src_masked
)) );
19140 IRTemp dHi
= newTemp(Ity_I64
);
19141 IRTemp dLo
= newTemp(Ity_I64
);
19142 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dst_masked
)) );
19143 assign( dLo
, unop(Iop_V128to64
, mkexpr(dst_masked
)) );
19145 /* Compute halves of the result separately */
19146 IRTemp resHi
= newTemp(Ity_I64
);
19147 IRTemp resLo
= newTemp(Ity_I64
);
19150 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19151 mkU64( 0x80 | (imm8
& 7) ));
19153 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19154 mkU64( 0x00 | (imm8
& 7) ));
19156 assign(resHi
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19157 "amd64g_calc_mpsadbw",
19158 &amd64g_calc_mpsadbw
, argsHi
));
19159 assign(resLo
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19160 "amd64g_calc_mpsadbw",
19161 &amd64g_calc_mpsadbw
, argsLo
));
19163 IRTemp res
= newTemp(Ity_V128
);
19164 assign(res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
)));
19168 static Long
dis_EXTRACTPS ( const VexAbiInfo
* vbi
, Prefix pfx
,
19169 Long delta
, Bool isAvx
)
19171 IRTemp addr
= IRTemp_INVALID
;
19174 UChar modrm
= getUChar(delta
);
19176 IRTemp xmm_vec
= newTemp(Ity_V128
);
19177 IRTemp src_dword
= newTemp(Ity_I32
);
19178 UInt rG
= gregOfRexRM(pfx
,modrm
);
19179 IRTemp t3
, t2
, t1
, t0
;
19180 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19182 assign( xmm_vec
, getXMMReg( rG
) );
19183 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19185 if ( epartIsReg( modrm
) ) {
19186 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19188 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19189 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19192 switch ( imm8_10
) {
19193 case 0: assign( src_dword
, mkexpr(t0
) ); break;
19194 case 1: assign( src_dword
, mkexpr(t1
) ); break;
19195 case 2: assign( src_dword
, mkexpr(t2
) ); break;
19196 case 3: assign( src_dword
, mkexpr(t3
) ); break;
19197 default: vassert(0);
19200 if ( epartIsReg( modrm
) ) {
19201 UInt rE
= eregOfRexRM(pfx
,modrm
);
19202 putIReg32( rE
, mkexpr(src_dword
) );
19204 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19205 nameXMMReg( rG
), nameIReg32( rE
) );
19207 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
19209 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19210 nameXMMReg( rG
), dis_buf
);
19217 static IRTemp
math_PCLMULQDQ( IRTemp dV
, IRTemp sV
, UInt imm8
)
19219 IRTemp t0
= newTemp(Ity_I64
);
19220 IRTemp t1
= newTemp(Ity_I64
);
19221 assign(t0
, unop((imm8
&1)? Iop_V128HIto64
: Iop_V128to64
,
19223 assign(t1
, unop((imm8
&16) ? Iop_V128HIto64
: Iop_V128to64
,
19226 IRTemp t2
= newTemp(Ity_I64
);
19227 IRTemp t3
= newTemp(Ity_I64
);
19231 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(0));
19232 assign(t2
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19233 &amd64g_calculate_pclmul
, args
));
19234 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(1));
19235 assign(t3
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19236 &amd64g_calculate_pclmul
, args
));
19238 IRTemp res
= newTemp(Ity_V128
);
19239 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)));
19244 __attribute__((noinline
))
19246 Long
dis_ESC_0F3A__SSE4 ( Bool
* decode_OK
,
19247 const VexAbiInfo
* vbi
,
19248 Prefix pfx
, Int sz
, Long deltaIN
)
19250 IRTemp addr
= IRTemp_INVALID
;
19255 *decode_OK
= False
;
19257 Long delta
= deltaIN
;
19258 UChar opc
= getUChar(delta
);
19263 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19264 if (have66noF2noF3(pfx
) && sz
== 2) {
19266 IRTemp src0
= newTemp(Ity_F32
);
19267 IRTemp src1
= newTemp(Ity_F32
);
19268 IRTemp src2
= newTemp(Ity_F32
);
19269 IRTemp src3
= newTemp(Ity_F32
);
19270 IRTemp res0
= newTemp(Ity_F32
);
19271 IRTemp res1
= newTemp(Ity_F32
);
19272 IRTemp res2
= newTemp(Ity_F32
);
19273 IRTemp res3
= newTemp(Ity_F32
);
19274 IRTemp rm
= newTemp(Ity_I32
);
19277 modrm
= getUChar(delta
);
19279 if (epartIsReg(modrm
)) {
19281 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19283 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 1 ) );
19285 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 2 ) );
19287 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 3 ) );
19288 imm
= getUChar(delta
+1);
19289 if (imm
& ~15) goto decode_failure
;
19291 DIP( "roundps $%d,%s,%s\n",
19292 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19293 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19295 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19296 gen_SEGV_if_not_16_aligned(addr
);
19297 assign( src0
, loadLE(Ity_F32
,
19298 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19299 assign( src1
, loadLE(Ity_F32
,
19300 binop(Iop_Add64
, mkexpr(addr
), mkU64(4) )));
19301 assign( src2
, loadLE(Ity_F32
,
19302 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19303 assign( src3
, loadLE(Ity_F32
,
19304 binop(Iop_Add64
, mkexpr(addr
), mkU64(12) )));
19305 imm
= getUChar(delta
+alen
);
19306 if (imm
& ~15) goto decode_failure
;
19308 DIP( "roundps $%d,%s,%s\n",
19309 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19312 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19313 that encoding is the same as the encoding for IRRoundingMode,
19314 we can use that value directly in the IR as a rounding
19316 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19318 assign(res0
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src0
)) );
19319 assign(res1
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src1
)) );
19320 assign(res2
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src2
)) );
19321 assign(res3
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src3
)) );
19323 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19324 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19325 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 2, mkexpr(res2
) );
19326 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 3, mkexpr(res3
) );
19328 goto decode_success
;
19333 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19334 if (have66noF2noF3(pfx
) && sz
== 2) {
19336 IRTemp src0
= newTemp(Ity_F64
);
19337 IRTemp src1
= newTemp(Ity_F64
);
19338 IRTemp res0
= newTemp(Ity_F64
);
19339 IRTemp res1
= newTemp(Ity_F64
);
19340 IRTemp rm
= newTemp(Ity_I32
);
19343 modrm
= getUChar(delta
);
19345 if (epartIsReg(modrm
)) {
19347 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 ) );
19349 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 1 ) );
19350 imm
= getUChar(delta
+1);
19351 if (imm
& ~15) goto decode_failure
;
19353 DIP( "roundpd $%d,%s,%s\n",
19354 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19355 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19357 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19358 gen_SEGV_if_not_16_aligned(addr
);
19359 assign( src0
, loadLE(Ity_F64
,
19360 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19361 assign( src1
, loadLE(Ity_F64
,
19362 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19363 imm
= getUChar(delta
+alen
);
19364 if (imm
& ~15) goto decode_failure
;
19366 DIP( "roundpd $%d,%s,%s\n",
19367 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19370 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19371 that encoding is the same as the encoding for IRRoundingMode,
19372 we can use that value directly in the IR as a rounding
19374 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19376 assign(res0
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src0
)) );
19377 assign(res1
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src1
)) );
19379 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19380 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19382 goto decode_success
;
19388 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19389 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19391 if (have66noF2noF3(pfx
) && sz
== 2) {
19393 Bool isD
= opc
== 0x0B;
19394 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
19395 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
19398 modrm
= getUChar(delta
);
19400 if (epartIsReg(modrm
)) {
19402 isD
? getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 )
19403 : getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19404 imm
= getUChar(delta
+1);
19405 if (imm
& ~15) goto decode_failure
;
19407 DIP( "rounds%c $%d,%s,%s\n",
19409 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19410 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19412 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19413 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
19414 imm
= getUChar(delta
+alen
);
19415 if (imm
& ~15) goto decode_failure
;
19417 DIP( "rounds%c $%d,%s,%s\n",
19419 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19422 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19423 that encoding is the same as the encoding for IRRoundingMode,
19424 we can use that value directly in the IR as a rounding
19426 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
19427 (imm
& 4) ? get_sse_roundingmode()
19432 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19434 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19436 goto decode_success
;
19441 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19442 Blend Packed Single Precision Floating-Point Values (XMM) */
19443 if (have66noF2noF3(pfx
) && sz
== 2) {
19446 IRTemp dst_vec
= newTemp(Ity_V128
);
19447 IRTemp src_vec
= newTemp(Ity_V128
);
19449 modrm
= getUChar(delta
);
19451 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19453 if ( epartIsReg( modrm
) ) {
19454 imm8
= (Int
)getUChar(delta
+1);
19455 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19457 DIP( "blendps $%d, %s,%s\n", imm8
,
19458 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19459 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19461 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19462 1/* imm8 is 1 byte after the amode */ );
19463 gen_SEGV_if_not_16_aligned( addr
);
19464 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19465 imm8
= (Int
)getUChar(delta
+alen
);
19467 DIP( "blendpd $%d, %s,%s\n",
19468 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19471 putXMMReg( gregOfRexRM(pfx
, modrm
),
19472 mkexpr( math_BLENDPS_128( src_vec
, dst_vec
, imm8
) ) );
19473 goto decode_success
;
19478 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19479 Blend Packed Double Precision Floating-Point Values (XMM) */
19480 if (have66noF2noF3(pfx
) && sz
== 2) {
19483 IRTemp dst_vec
= newTemp(Ity_V128
);
19484 IRTemp src_vec
= newTemp(Ity_V128
);
19486 modrm
= getUChar(delta
);
19487 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19489 if ( epartIsReg( modrm
) ) {
19490 imm8
= (Int
)getUChar(delta
+1);
19491 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19493 DIP( "blendpd $%d, %s,%s\n", imm8
,
19494 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19495 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19497 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19498 1/* imm8 is 1 byte after the amode */ );
19499 gen_SEGV_if_not_16_aligned( addr
);
19500 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19501 imm8
= (Int
)getUChar(delta
+alen
);
19503 DIP( "blendpd $%d, %s,%s\n",
19504 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19507 putXMMReg( gregOfRexRM(pfx
, modrm
),
19508 mkexpr( math_BLENDPD_128( src_vec
, dst_vec
, imm8
) ) );
19509 goto decode_success
;
19514 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19515 Blend Packed Words (XMM) */
19516 if (have66noF2noF3(pfx
) && sz
== 2) {
19519 IRTemp dst_vec
= newTemp(Ity_V128
);
19520 IRTemp src_vec
= newTemp(Ity_V128
);
19522 modrm
= getUChar(delta
);
19524 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19526 if ( epartIsReg( modrm
) ) {
19527 imm8
= (Int
)getUChar(delta
+1);
19528 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19530 DIP( "pblendw $%d, %s,%s\n", imm8
,
19531 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19532 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19534 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19535 1/* imm8 is 1 byte after the amode */ );
19536 gen_SEGV_if_not_16_aligned( addr
);
19537 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19538 imm8
= (Int
)getUChar(delta
+alen
);
19540 DIP( "pblendw $%d, %s,%s\n",
19541 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19544 putXMMReg( gregOfRexRM(pfx
, modrm
),
19545 mkexpr( math_PBLENDW_128( src_vec
, dst_vec
, imm8
) ) );
19546 goto decode_success
;
19551 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19552 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19554 if (have66noF2noF3(pfx
) && sz
== 2) {
19555 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19556 goto decode_success
;
19561 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19562 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19564 if (have66noF2noF3(pfx
) && sz
== 2) {
19565 delta
= dis_PEXTRW( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19566 goto decode_success
;
19571 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19572 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19573 Note that this insn has the same opcodes as PEXTRQ, but
19574 here the REX.W bit is _not_ present */
19575 if (have66noF2noF3(pfx
)
19576 && sz
== 2 /* REX.W is _not_ present */) {
19577 delta
= dis_PEXTRD( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19578 goto decode_success
;
19580 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19581 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19582 Note that this insn has the same opcodes as PEXTRD, but
19583 here the REX.W bit is present */
19584 if (have66noF2noF3(pfx
)
19585 && sz
== 8 /* REX.W is present */) {
19586 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, False
/*!isAvx*/);
19587 goto decode_success
;
19592 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19593 float from xmm reg and store in gen.reg or mem. This is
19594 identical to PEXTRD, except that REX.W appears to be ignored.
19596 if (have66noF2noF3(pfx
)
19597 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
19598 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19599 goto decode_success
;
19604 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19605 Extract byte from r32/m8 and insert into xmm1 */
19606 if (have66noF2noF3(pfx
) && sz
== 2) {
19608 IRTemp new8
= newTemp(Ity_I8
);
19609 modrm
= getUChar(delta
);
19610 UInt rG
= gregOfRexRM(pfx
, modrm
);
19611 if ( epartIsReg( modrm
) ) {
19612 UInt rE
= eregOfRexRM(pfx
,modrm
);
19613 imm8
= (Int
)(getUChar(delta
+1) & 0xF);
19614 assign( new8
, unop(Iop_32to8
, getIReg32(rE
)) );
19616 DIP( "pinsrb $%d,%s,%s\n", imm8
,
19617 nameIReg32(rE
), nameXMMReg(rG
) );
19619 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19620 imm8
= (Int
)(getUChar(delta
+alen
) & 0xF);
19621 assign( new8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
19623 DIP( "pinsrb $%d,%s,%s\n",
19624 imm8
, dis_buf
, nameXMMReg(rG
) );
19626 IRTemp src_vec
= newTemp(Ity_V128
);
19627 assign(src_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
19628 IRTemp res
= math_PINSRB_128( src_vec
, new8
, imm8
);
19629 putXMMReg( rG
, mkexpr(res
) );
19630 goto decode_success
;
19635 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19636 Insert Packed Single Precision Floating-Point Value (XMM) */
19637 if (have66noF2noF3(pfx
) && sz
== 2) {
19639 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
19640 const IRTemp inval
= IRTemp_INVALID
;
19642 modrm
= getUChar(delta
);
19643 UInt rG
= gregOfRexRM(pfx
, modrm
);
19645 if ( epartIsReg( modrm
) ) {
19646 UInt rE
= eregOfRexRM(pfx
, modrm
);
19647 IRTemp vE
= newTemp(Ity_V128
);
19648 assign( vE
, getXMMReg(rE
) );
19649 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
19650 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
19651 imm8
= getUChar(delta
+1);
19652 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
19654 DIP( "insertps $%u, %s,%s\n",
19655 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19657 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19658 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19659 imm8
= getUChar(delta
+alen
);
19661 DIP( "insertps $%u, %s,%s\n",
19662 imm8
, dis_buf
, nameXMMReg(rG
) );
19665 IRTemp vG
= newTemp(Ity_V128
);
19666 assign( vG
, getXMMReg(rG
) );
19668 putXMMReg( rG
, mkexpr(math_INSERTPS( vG
, d2ins
, imm8
)) );
19669 goto decode_success
;
19674 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19675 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19676 if (have66noF2noF3(pfx
)
19677 && sz
== 2 /* REX.W is NOT present */) {
19679 IRTemp src_u32
= newTemp(Ity_I32
);
19680 modrm
= getUChar(delta
);
19681 UInt rG
= gregOfRexRM(pfx
, modrm
);
19683 if ( epartIsReg( modrm
) ) {
19684 UInt rE
= eregOfRexRM(pfx
,modrm
);
19685 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19686 assign( src_u32
, getIReg32( rE
) );
19688 DIP( "pinsrd $%d, %s,%s\n",
19689 imm8_10
, nameIReg32(rE
), nameXMMReg(rG
) );
19691 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19692 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19693 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19695 DIP( "pinsrd $%d, %s,%s\n",
19696 imm8_10
, dis_buf
, nameXMMReg(rG
) );
19699 IRTemp src_vec
= newTemp(Ity_V128
);
19700 assign(src_vec
, getXMMReg( rG
));
19701 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
19702 putXMMReg( rG
, mkexpr(res_vec
) );
19703 goto decode_success
;
19705 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19706 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19707 if (have66noF2noF3(pfx
)
19708 && sz
== 8 /* REX.W is present */) {
19710 IRTemp src_u64
= newTemp(Ity_I64
);
19711 modrm
= getUChar(delta
);
19712 UInt rG
= gregOfRexRM(pfx
, modrm
);
19714 if ( epartIsReg( modrm
) ) {
19715 UInt rE
= eregOfRexRM(pfx
,modrm
);
19716 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
19717 assign( src_u64
, getIReg64( rE
) );
19719 DIP( "pinsrq $%d, %s,%s\n",
19720 imm8_0
, nameIReg64(rE
), nameXMMReg(rG
) );
19722 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19723 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
19724 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
19726 DIP( "pinsrq $%d, %s,%s\n",
19727 imm8_0
, dis_buf
, nameXMMReg(rG
) );
19730 IRTemp src_vec
= newTemp(Ity_V128
);
19731 assign(src_vec
, getXMMReg( rG
));
19732 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
19733 putXMMReg( rG
, mkexpr(res_vec
) );
19734 goto decode_success
;
19739 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19740 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19741 if (have66noF2noF3(pfx
) && sz
== 2) {
19742 modrm
= getUChar(delta
);
19744 IRTemp src_vec
= newTemp(Ity_V128
);
19745 IRTemp dst_vec
= newTemp(Ity_V128
);
19746 UInt rG
= gregOfRexRM(pfx
, modrm
);
19747 assign( dst_vec
, getXMMReg( rG
) );
19748 if ( epartIsReg( modrm
) ) {
19749 UInt rE
= eregOfRexRM(pfx
, modrm
);
19750 imm8
= (Int
)getUChar(delta
+1);
19751 assign( src_vec
, getXMMReg(rE
) );
19753 DIP( "dpps $%d, %s,%s\n",
19754 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19756 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19757 1/* imm8 is 1 byte after the amode */ );
19758 gen_SEGV_if_not_16_aligned( addr
);
19759 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19760 imm8
= (Int
)getUChar(delta
+alen
);
19762 DIP( "dpps $%d, %s,%s\n",
19763 imm8
, dis_buf
, nameXMMReg(rG
) );
19765 IRTemp res
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
19766 putXMMReg( rG
, mkexpr(res
) );
19767 goto decode_success
;
19772 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19773 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19774 if (have66noF2noF3(pfx
) && sz
== 2) {
19775 modrm
= getUChar(delta
);
19777 IRTemp src_vec
= newTemp(Ity_V128
);
19778 IRTemp dst_vec
= newTemp(Ity_V128
);
19779 UInt rG
= gregOfRexRM(pfx
, modrm
);
19780 assign( dst_vec
, getXMMReg( rG
) );
19781 if ( epartIsReg( modrm
) ) {
19782 UInt rE
= eregOfRexRM(pfx
, modrm
);
19783 imm8
= (Int
)getUChar(delta
+1);
19784 assign( src_vec
, getXMMReg(rE
) );
19786 DIP( "dppd $%d, %s,%s\n",
19787 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19789 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19790 1/* imm8 is 1 byte after the amode */ );
19791 gen_SEGV_if_not_16_aligned( addr
);
19792 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19793 imm8
= (Int
)getUChar(delta
+alen
);
19795 DIP( "dppd $%d, %s,%s\n",
19796 imm8
, dis_buf
, nameXMMReg(rG
) );
19798 IRTemp res
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
19799 putXMMReg( rG
, mkexpr(res
) );
19800 goto decode_success
;
19805 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19806 Multiple Packed Sums of Absolule Difference (XMM) */
19807 if (have66noF2noF3(pfx
) && sz
== 2) {
19809 IRTemp src_vec
= newTemp(Ity_V128
);
19810 IRTemp dst_vec
= newTemp(Ity_V128
);
19811 modrm
= getUChar(delta
);
19812 UInt rG
= gregOfRexRM(pfx
, modrm
);
19814 assign( dst_vec
, getXMMReg(rG
) );
19816 if ( epartIsReg( modrm
) ) {
19817 UInt rE
= eregOfRexRM(pfx
, modrm
);
19819 imm8
= (Int
)getUChar(delta
+1);
19820 assign( src_vec
, getXMMReg(rE
) );
19822 DIP( "mpsadbw $%d, %s,%s\n", imm8
,
19823 nameXMMReg(rE
), nameXMMReg(rG
) );
19825 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19826 1/* imm8 is 1 byte after the amode */ );
19827 gen_SEGV_if_not_16_aligned( addr
);
19828 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19829 imm8
= (Int
)getUChar(delta
+alen
);
19831 DIP( "mpsadbw $%d, %s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
) );
19834 putXMMReg( rG
, mkexpr( math_MPSADBW_128(dst_vec
, src_vec
, imm8
) ) );
19835 goto decode_success
;
19840 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19841 * Carry-less multiplication of selected XMM quadwords into XMM
19842 * registers (a.k.a multiplication of polynomials over GF(2))
19844 if (have66noF2noF3(pfx
) && sz
== 2) {
19847 IRTemp svec
= newTemp(Ity_V128
);
19848 IRTemp dvec
= newTemp(Ity_V128
);
19849 modrm
= getUChar(delta
);
19850 UInt rG
= gregOfRexRM(pfx
, modrm
);
19852 assign( dvec
, getXMMReg(rG
) );
19854 if ( epartIsReg( modrm
) ) {
19855 UInt rE
= eregOfRexRM(pfx
, modrm
);
19856 imm8
= (Int
)getUChar(delta
+1);
19857 assign( svec
, getXMMReg(rE
) );
19859 DIP( "pclmulqdq $%d, %s,%s\n", imm8
,
19860 nameXMMReg(rE
), nameXMMReg(rG
) );
19862 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19863 1/* imm8 is 1 byte after the amode */ );
19864 gen_SEGV_if_not_16_aligned( addr
);
19865 assign( svec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19866 imm8
= (Int
)getUChar(delta
+alen
);
19868 DIP( "pclmulqdq $%d, %s,%s\n",
19869 imm8
, dis_buf
, nameXMMReg(rG
) );
19872 putXMMReg( rG
, mkexpr( math_PCLMULQDQ(dvec
, svec
, imm8
) ) );
19873 goto decode_success
;
19881 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19882 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19883 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19884 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19885 (selected special cases that actually occur in glibc,
19886 not by any means a complete implementation.)
19888 if (have66noF2noF3(pfx
) && sz
== 2) {
19889 Long delta0
= delta
;
19890 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
19891 if (delta
> delta0
) goto decode_success
;
19892 /* else fall though; dis_PCMPxSTRx failed to decode it */
19897 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19898 if (have66noF2noF3(pfx
) && sz
== 2) {
19899 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19900 goto decode_success
;
19910 *decode_OK
= False
;
19919 /*------------------------------------------------------------*/
19921 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19923 /*------------------------------------------------------------*/
19925 __attribute__((noinline
))
19927 Long
dis_ESC_NONE (
19928 /*MB_OUT*/DisResult
* dres
,
19929 /*MB_OUT*/Bool
* expect_CAS
,
19930 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
19932 void* callback_opaque
,
19933 const VexArchInfo
* archinfo
,
19934 const VexAbiInfo
* vbi
,
19935 Prefix pfx
, Int sz
, Long deltaIN
19940 IRTemp addr
= IRTemp_INVALID
;
19941 IRTemp t1
= IRTemp_INVALID
;
19942 IRTemp t2
= IRTemp_INVALID
;
19943 IRTemp t3
= IRTemp_INVALID
;
19944 IRTemp t4
= IRTemp_INVALID
;
19945 IRTemp t5
= IRTemp_INVALID
;
19946 IRType ty
= Ity_INVALID
;
19953 Long delta
= deltaIN
;
19954 UChar opc
= getUChar(delta
); delta
++;
19956 /* delta now points at the modrm byte. In most of the cases that
19957 follow, neither the F2 nor F3 prefixes are allowed. However,
19958 for some basic arithmetic operations we have to allow F2/XACQ or
19959 F3/XREL in the case where the destination is memory and the LOCK
19960 prefix is also present. Do this check by looking at the modrm
19961 byte but not advancing delta over it. */
19962 /* By default, F2 and F3 are not allowed, so let's start off with
19964 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
19965 { UChar tmp_modrm
= getUChar(delta
);
19967 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19968 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19969 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19970 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19971 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19972 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19973 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19974 if (!epartIsReg(tmp_modrm
)
19975 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
19976 /* dst is mem, and we have F2 or F3 but not both */
19977 validF2orF3
= True
;
19985 /* Now, in the switch below, for the opc values examined by the
19986 switch above, use validF2orF3 rather than looking at pfx
19990 case 0x00: /* ADD Gb,Eb */
19991 if (!validF2orF3
) goto decode_failure
;
19992 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
19994 case 0x01: /* ADD Gv,Ev */
19995 if (!validF2orF3
) goto decode_failure
;
19996 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
19999 case 0x02: /* ADD Eb,Gb */
20000 if (haveF2orF3(pfx
)) goto decode_failure
;
20001 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
20003 case 0x03: /* ADD Ev,Gv */
20004 if (haveF2orF3(pfx
)) goto decode_failure
;
20005 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
20008 case 0x04: /* ADD Ib, AL */
20009 if (haveF2orF3(pfx
)) goto decode_failure
;
20010 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
20012 case 0x05: /* ADD Iv, eAX */
20013 if (haveF2orF3(pfx
)) goto decode_failure
;
20014 delta
= dis_op_imm_A(sz
, False
, Iop_Add8
, True
, delta
, "add" );
20017 case 0x08: /* OR Gb,Eb */
20018 if (!validF2orF3
) goto decode_failure
;
20019 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
20021 case 0x09: /* OR Gv,Ev */
20022 if (!validF2orF3
) goto decode_failure
;
20023 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
20026 case 0x0A: /* OR Eb,Gb */
20027 if (haveF2orF3(pfx
)) goto decode_failure
;
20028 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
20030 case 0x0B: /* OR Ev,Gv */
20031 if (haveF2orF3(pfx
)) goto decode_failure
;
20032 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
20035 case 0x0C: /* OR Ib, AL */
20036 if (haveF2orF3(pfx
)) goto decode_failure
;
20037 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
20039 case 0x0D: /* OR Iv, eAX */
20040 if (haveF2orF3(pfx
)) goto decode_failure
;
20041 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
20044 case 0x10: /* ADC Gb,Eb */
20045 if (!validF2orF3
) goto decode_failure
;
20046 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
20048 case 0x11: /* ADC Gv,Ev */
20049 if (!validF2orF3
) goto decode_failure
;
20050 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
20053 case 0x12: /* ADC Eb,Gb */
20054 if (haveF2orF3(pfx
)) goto decode_failure
;
20055 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
20057 case 0x13: /* ADC Ev,Gv */
20058 if (haveF2orF3(pfx
)) goto decode_failure
;
20059 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
20062 case 0x14: /* ADC Ib, AL */
20063 if (haveF2orF3(pfx
)) goto decode_failure
;
20064 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
20066 case 0x15: /* ADC Iv, eAX */
20067 if (haveF2orF3(pfx
)) goto decode_failure
;
20068 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
20071 case 0x18: /* SBB Gb,Eb */
20072 if (!validF2orF3
) goto decode_failure
;
20073 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
20075 case 0x19: /* SBB Gv,Ev */
20076 if (!validF2orF3
) goto decode_failure
;
20077 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20080 case 0x1A: /* SBB Eb,Gb */
20081 if (haveF2orF3(pfx
)) goto decode_failure
;
20082 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
20084 case 0x1B: /* SBB Ev,Gv */
20085 if (haveF2orF3(pfx
)) goto decode_failure
;
20086 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20089 case 0x1C: /* SBB Ib, AL */
20090 if (haveF2orF3(pfx
)) goto decode_failure
;
20091 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
20093 case 0x1D: /* SBB Iv, eAX */
20094 if (haveF2orF3(pfx
)) goto decode_failure
;
20095 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
20098 case 0x20: /* AND Gb,Eb */
20099 if (!validF2orF3
) goto decode_failure
;
20100 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20102 case 0x21: /* AND Gv,Ev */
20103 if (!validF2orF3
) goto decode_failure
;
20104 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20107 case 0x22: /* AND Eb,Gb */
20108 if (haveF2orF3(pfx
)) goto decode_failure
;
20109 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20111 case 0x23: /* AND Ev,Gv */
20112 if (haveF2orF3(pfx
)) goto decode_failure
;
20113 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20116 case 0x24: /* AND Ib, AL */
20117 if (haveF2orF3(pfx
)) goto decode_failure
;
20118 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
20120 case 0x25: /* AND Iv, eAX */
20121 if (haveF2orF3(pfx
)) goto decode_failure
;
20122 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
20125 case 0x28: /* SUB Gb,Eb */
20126 if (!validF2orF3
) goto decode_failure
;
20127 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20129 case 0x29: /* SUB Gv,Ev */
20130 if (!validF2orF3
) goto decode_failure
;
20131 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20134 case 0x2A: /* SUB Eb,Gb */
20135 if (haveF2orF3(pfx
)) goto decode_failure
;
20136 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20138 case 0x2B: /* SUB Ev,Gv */
20139 if (haveF2orF3(pfx
)) goto decode_failure
;
20140 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20143 case 0x2C: /* SUB Ib, AL */
20144 if (haveF2orF3(pfx
)) goto decode_failure
;
20145 delta
= dis_op_imm_A(1, False
, Iop_Sub8
, True
, delta
, "sub" );
20147 case 0x2D: /* SUB Iv, eAX */
20148 if (haveF2orF3(pfx
)) goto decode_failure
;
20149 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
20152 case 0x30: /* XOR Gb,Eb */
20153 if (!validF2orF3
) goto decode_failure
;
20154 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20156 case 0x31: /* XOR Gv,Ev */
20157 if (!validF2orF3
) goto decode_failure
;
20158 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20161 case 0x32: /* XOR Eb,Gb */
20162 if (haveF2orF3(pfx
)) goto decode_failure
;
20163 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20165 case 0x33: /* XOR Ev,Gv */
20166 if (haveF2orF3(pfx
)) goto decode_failure
;
20167 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20170 case 0x34: /* XOR Ib, AL */
20171 if (haveF2orF3(pfx
)) goto decode_failure
;
20172 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
20174 case 0x35: /* XOR Iv, eAX */
20175 if (haveF2orF3(pfx
)) goto decode_failure
;
20176 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
20179 case 0x38: /* CMP Gb,Eb */
20180 if (haveF2orF3(pfx
)) goto decode_failure
;
20181 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20183 case 0x39: /* CMP Gv,Ev */
20184 if (haveF2orF3(pfx
)) goto decode_failure
;
20185 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20188 case 0x3A: /* CMP Eb,Gb */
20189 if (haveF2orF3(pfx
)) goto decode_failure
;
20190 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20192 case 0x3B: /* CMP Ev,Gv */
20193 if (haveF2orF3(pfx
)) goto decode_failure
;
20194 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20197 case 0x3C: /* CMP Ib, AL */
20198 if (haveF2orF3(pfx
)) goto decode_failure
;
20199 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
20201 case 0x3D: /* CMP Iv, eAX */
20202 if (haveF2orF3(pfx
)) goto decode_failure
;
20203 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
20206 case 0x50: /* PUSH eAX */
20207 case 0x51: /* PUSH eCX */
20208 case 0x52: /* PUSH eDX */
20209 case 0x53: /* PUSH eBX */
20210 case 0x55: /* PUSH eBP */
20211 case 0x56: /* PUSH eSI */
20212 case 0x57: /* PUSH eDI */
20213 case 0x54: /* PUSH eSP */
20214 /* This is the Right Way, in that the value to be pushed is
20215 established before %rsp is changed, so that pushq %rsp
20216 correctly pushes the old value. */
20217 if (haveF2orF3(pfx
)) goto decode_failure
;
20218 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20220 sz
= 8; /* there is no encoding for 32-bit push in 64-bit mode */
20221 ty
= sz
==2 ? Ity_I16
: Ity_I64
;
20223 t2
= newTemp(Ity_I64
);
20224 assign(t1
, getIRegRexB(sz
, pfx
, opc
-0x50));
20225 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(sz
)));
20226 putIReg64(R_RSP
, mkexpr(t2
) );
20227 storeLE(mkexpr(t2
),mkexpr(t1
));
20228 DIP("push%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x50));
20231 case 0x58: /* POP eAX */
20232 case 0x59: /* POP eCX */
20233 case 0x5A: /* POP eDX */
20234 case 0x5B: /* POP eBX */
20235 case 0x5D: /* POP eBP */
20236 case 0x5E: /* POP eSI */
20237 case 0x5F: /* POP eDI */
20238 case 0x5C: /* POP eSP */
20239 if (haveF2orF3(pfx
)) goto decode_failure
;
20240 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20242 sz
= 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20243 t1
= newTemp(szToITy(sz
));
20244 t2
= newTemp(Ity_I64
);
20245 assign(t2
, getIReg64(R_RSP
));
20246 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
20247 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20248 putIRegRexB(sz
, pfx
, opc
-0x58, mkexpr(t1
));
20249 DIP("pop%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x58));
20252 case 0x63: /* MOVSX */
20253 if (haveF2orF3(pfx
)) goto decode_failure
;
20254 if (haveREX(pfx
) && 1==getRexW(pfx
)) {
20256 /* movsx r/m32 to r64 */
20257 modrm
= getUChar(delta
);
20258 if (epartIsReg(modrm
)) {
20260 putIRegG(8, pfx
, modrm
,
20262 getIRegE(4, pfx
, modrm
)));
20263 DIP("movslq %s,%s\n",
20264 nameIRegE(4, pfx
, modrm
),
20265 nameIRegG(8, pfx
, modrm
));
20268 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20270 putIRegG(8, pfx
, modrm
,
20272 loadLE(Ity_I32
, mkexpr(addr
))));
20273 DIP("movslq %s,%s\n", dis_buf
,
20274 nameIRegG(8, pfx
, modrm
));
20278 goto decode_failure
;
20281 case 0x68: /* PUSH Iv */
20282 if (haveF2orF3(pfx
)) goto decode_failure
;
20283 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20284 if (sz
== 4) sz
= 8;
20285 d64
= getSDisp(imin(4,sz
),delta
);
20286 delta
+= imin(4,sz
);
20289 case 0x69: /* IMUL Iv, Ev, Gv */
20290 if (haveF2orF3(pfx
)) goto decode_failure
;
20291 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, sz
);
20294 case 0x6A: /* PUSH Ib, sign-extended to sz */
20295 if (haveF2orF3(pfx
)) goto decode_failure
;
20296 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20297 if (sz
== 4) sz
= 8;
20298 d64
= getSDisp8(delta
); delta
+= 1;
20302 t1
= newTemp(Ity_I64
);
20304 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20305 putIReg64(R_RSP
, mkexpr(t1
) );
20306 /* stop mkU16 asserting if d32 is a negative 16-bit number
20310 storeLE( mkexpr(t1
), mkU(ty
,d64
) );
20311 DIP("push%c $%lld\n", nameISize(sz
), (Long
)d64
);
20314 case 0x6B: /* IMUL Ib, Ev, Gv */
20315 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, 1 );
20320 case 0x72: /* JBb/JNAEb (jump below) */
20321 case 0x73: /* JNBb/JAEb (jump not below) */
20322 case 0x74: /* JZb/JEb (jump zero) */
20323 case 0x75: /* JNZb/JNEb (jump not zero) */
20324 case 0x76: /* JBEb/JNAb (jump below or equal) */
20325 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20326 case 0x78: /* JSb (jump negative) */
20327 case 0x79: /* JSb (jump not negative) */
20328 case 0x7A: /* JP (jump parity even) */
20329 case 0x7B: /* JNP/JPO (jump parity odd) */
20330 case 0x7C: /* JLb/JNGEb (jump less) */
20331 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20332 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20333 case 0x7F: { /* JGb/JNLEb (jump greater) */
20335 const HChar
* comment
= "";
20336 if (haveF3(pfx
)) goto decode_failure
;
20337 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20338 jmpDelta
= getSDisp8(delta
);
20339 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
20340 d64
= (guest_RIP_bbstart
+delta
+1) + jmpDelta
;
20343 && vex_control
.guest_chase_cond
20344 && (Addr64
)d64
!= (Addr64
)guest_RIP_bbstart
20346 && resteerOkFn( callback_opaque
, (Addr64
)d64
) ) {
20347 /* Speculation: assume this backward branch is taken. So we
20348 need to emit a side-exit to the insn following this one,
20349 on the negation of the condition, and continue at the
20350 branch target address (d64). If we wind up back at the
20351 first instruction of the trace, just stop; it's better to
20352 let the IR loop unroller handle that case. */
20354 mk_amd64g_calculate_condition(
20355 (AMD64Condcode
)(1 ^ (opc
- 0x70))),
20357 IRConst_U64(guest_RIP_bbstart
+delta
),
20359 dres
->whatNext
= Dis_ResteerC
;
20360 dres
->continueAt
= d64
;
20361 comment
= "(assumed taken)";
20365 && vex_control
.guest_chase_cond
20366 && (Addr64
)d64
!= (Addr64
)guest_RIP_bbstart
20368 && resteerOkFn( callback_opaque
, guest_RIP_bbstart
+delta
) ) {
20369 /* Speculation: assume this forward branch is not taken. So
20370 we need to emit a side-exit to d64 (the dest) and continue
20371 disassembling at the insn immediately following this
20374 mk_amd64g_calculate_condition((AMD64Condcode
)(opc
- 0x70)),
20378 dres
->whatNext
= Dis_ResteerC
;
20379 dres
->continueAt
= guest_RIP_bbstart
+delta
;
20380 comment
= "(assumed not taken)";
20383 /* Conservative default translation - end the block at this
20385 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x70),
20386 guest_RIP_bbstart
+delta
, d64
);
20387 vassert(dres
->whatNext
== Dis_StopHere
);
20389 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc
- 0x70), (ULong
)d64
,
20394 case 0x80: /* Grp1 Ib,Eb */
20395 modrm
= getUChar(delta
);
20396 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20397 just one for the mem case and also require LOCK in this case.
20398 Note that this erroneously allows XACQ/XREL on CMP since we
20399 don't check the subopcode here. No big deal. */
20400 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20401 goto decode_failure
;
20402 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20403 goto decode_failure
;
20404 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20405 goto decode_failure
;
20406 am_sz
= lengthAMode(pfx
,delta
);
20409 d64
= getSDisp8(delta
+ am_sz
);
20410 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20413 case 0x81: /* Grp1 Iv,Ev */
20414 modrm
= getUChar(delta
);
20415 /* Same comment as for case 0x80 just above. */
20416 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20417 goto decode_failure
;
20418 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20419 goto decode_failure
;
20420 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20421 goto decode_failure
;
20422 am_sz
= lengthAMode(pfx
,delta
);
20424 d64
= getSDisp(d_sz
, delta
+ am_sz
);
20425 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20428 case 0x83: /* Grp1 Ib,Ev */
20429 if (haveF2orF3(pfx
)) goto decode_failure
;
20430 modrm
= getUChar(delta
);
20431 am_sz
= lengthAMode(pfx
,delta
);
20433 d64
= getSDisp8(delta
+ am_sz
);
20434 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20437 case 0x84: /* TEST Eb,Gb */
20438 if (haveF2orF3(pfx
)) goto decode_failure
;
20439 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20440 1, delta
, "test" );
20443 case 0x85: /* TEST Ev,Gv */
20444 if (haveF2orF3(pfx
)) goto decode_failure
;
20445 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20446 sz
, delta
, "test" );
20449 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20450 prefix. Therefore, generate CAS regardless of the presence or
20451 otherwise of a LOCK prefix. */
20452 case 0x86: /* XCHG Gb,Eb */
20454 /* Fall through ... */
20455 case 0x87: /* XCHG Gv,Ev */
20456 modrm
= getUChar(delta
);
20457 /* Check whether F2 or F3 are allowable. For the mem case, one
20458 or the othter but not both are. We don't care about the
20459 presence of LOCK in this case -- XCHG is unusual in this
20461 if (haveF2orF3(pfx
)) {
20462 if (epartIsReg(modrm
)) {
20463 goto decode_failure
;
20465 if (haveF2andF3(pfx
))
20466 goto decode_failure
;
20470 t1
= newTemp(ty
); t2
= newTemp(ty
);
20471 if (epartIsReg(modrm
)) {
20472 assign(t1
, getIRegE(sz
, pfx
, modrm
));
20473 assign(t2
, getIRegG(sz
, pfx
, modrm
));
20474 putIRegG(sz
, pfx
, modrm
, mkexpr(t1
));
20475 putIRegE(sz
, pfx
, modrm
, mkexpr(t2
));
20477 DIP("xchg%c %s, %s\n",
20478 nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
20479 nameIRegE(sz
, pfx
, modrm
));
20481 *expect_CAS
= True
;
20482 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20483 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
20484 assign( t2
, getIRegG(sz
, pfx
, modrm
) );
20485 casLE( mkexpr(addr
),
20486 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
20487 putIRegG( sz
, pfx
, modrm
, mkexpr(t1
) );
20489 DIP("xchg%c %s, %s\n", nameISize(sz
),
20490 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
20494 case 0x88: { /* MOV Gb,Eb */
20495 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20497 delta
= dis_mov_G_E(vbi
, pfx
, 1, delta
, &ok
);
20498 if (!ok
) goto decode_failure
;
20502 case 0x89: { /* MOV Gv,Ev */
20503 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20505 delta
= dis_mov_G_E(vbi
, pfx
, sz
, delta
, &ok
);
20506 if (!ok
) goto decode_failure
;
20510 case 0x8A: /* MOV Eb,Gb */
20511 if (haveF2orF3(pfx
)) goto decode_failure
;
20512 delta
= dis_mov_E_G(vbi
, pfx
, 1, delta
);
20515 case 0x8B: /* MOV Ev,Gv */
20516 if (haveF2orF3(pfx
)) goto decode_failure
;
20517 delta
= dis_mov_E_G(vbi
, pfx
, sz
, delta
);
20520 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20521 if (haveF2orF3(pfx
)) goto decode_failure
;
20522 delta
= dis_mov_S_E(vbi
, pfx
, sz
, delta
);
20525 case 0x8D: /* LEA M,Gv */
20526 if (haveF2orF3(pfx
)) goto decode_failure
;
20527 if (sz
!= 4 && sz
!= 8)
20528 goto decode_failure
;
20529 modrm
= getUChar(delta
);
20530 if (epartIsReg(modrm
))
20531 goto decode_failure
;
20532 /* NOTE! this is the one place where a segment override prefix
20533 has no effect on the address calculation. Therefore we clear
20534 any segment override bits in pfx. */
20535 addr
= disAMode ( &alen
, vbi
, clearSegBits(pfx
), delta
, dis_buf
, 0 );
20537 /* This is a hack. But it isn't clear that really doing the
20538 calculation at 32 bits is really worth it. Hence for leal,
20539 do the full 64-bit calculation and then truncate it. */
20540 putIRegG( sz
, pfx
, modrm
,
20542 ? unop(Iop_64to32
, mkexpr(addr
))
20545 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
20546 nameIRegG(sz
,pfx
,modrm
));
20549 case 0x8F: { /* POPQ m64 / POPW m16 */
20552 /* There is no encoding for 32-bit pop in 64-bit mode.
20553 So sz==4 actually means sz==8. */
20554 if (haveF2orF3(pfx
)) goto decode_failure
;
20555 vassert(sz
== 2 || sz
== 4
20556 || /* tolerate redundant REX.W, see #210481 */ sz
== 8);
20557 if (sz
== 4) sz
= 8;
20558 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20560 rm
= getUChar(delta
);
20562 /* make sure this instruction is correct POP */
20563 if (epartIsReg(rm
) || gregLO3ofRM(rm
) != 0)
20564 goto decode_failure
;
20565 /* and has correct size */
20568 t1
= newTemp(Ity_I64
);
20569 t3
= newTemp(Ity_I64
);
20570 assign( t1
, getIReg64(R_RSP
) );
20571 assign( t3
, loadLE(Ity_I64
, mkexpr(t1
)) );
20573 /* Increase RSP; must be done before the STORE. Intel manual
20574 says: If the RSP register is used as a base register for
20575 addressing a destination operand in memory, the POP
20576 instruction computes the effective address of the operand
20577 after it increments the RSP register. */
20578 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(sz
)) );
20580 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
20581 storeLE( mkexpr(addr
), mkexpr(t3
) );
20583 DIP("popl %s\n", dis_buf
);
20589 case 0x90: /* XCHG eAX,eAX */
20590 /* detect and handle F3 90 (rep nop) specially */
20591 if (!have66(pfx
) && !haveF2(pfx
) && haveF3(pfx
)) {
20592 DIP("rep nop (P4 pause)\n");
20593 /* "observe" the hint. The Vex client needs to be careful not
20594 to cause very long delays as a result, though. */
20595 jmp_lit(dres
, Ijk_Yield
, guest_RIP_bbstart
+delta
);
20596 vassert(dres
->whatNext
== Dis_StopHere
);
20599 /* detect and handle NOPs specially */
20600 if (/* F2/F3 probably change meaning completely */
20602 /* If REX.B is 1, we're not exchanging rAX with itself */
20603 && getRexB(pfx
)==0 ) {
20607 /* else fall through to normal case. */
20608 case 0x91: /* XCHG rAX,rCX */
20609 case 0x92: /* XCHG rAX,rDX */
20610 case 0x93: /* XCHG rAX,rBX */
20611 case 0x94: /* XCHG rAX,rSP */
20612 case 0x95: /* XCHG rAX,rBP */
20613 case 0x96: /* XCHG rAX,rSI */
20614 case 0x97: /* XCHG rAX,rDI */
20615 /* guard against mutancy */
20616 if (haveF2orF3(pfx
)) goto decode_failure
;
20617 codegen_xchg_rAX_Reg ( pfx
, sz
, opc
- 0x90 );
20620 case 0x98: /* CBW */
20621 if (haveF2orF3(pfx
)) goto decode_failure
;
20623 putIRegRAX( 8, unop(Iop_32Sto64
, getIRegRAX(4)) );
20624 DIP(/*"cdqe\n"*/"cltq");
20628 putIRegRAX( 4, unop(Iop_16Sto32
, getIRegRAX(2)) );
20633 putIRegRAX( 2, unop(Iop_8Sto16
, getIRegRAX(1)) );
20637 goto decode_failure
;
20639 case 0x99: /* CWD/CDQ/CQO */
20640 if (haveF2orF3(pfx
)) goto decode_failure
;
20641 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20644 binop(mkSizedOp(ty
,Iop_Sar8
),
20646 mkU8(sz
== 2 ? 15 : (sz
== 4 ? 31 : 63))) );
20647 DIP(sz
== 2 ? "cwd\n"
20648 : (sz
== 4 ? /*"cdq\n"*/ "cltd\n"
20652 case 0x9B: /* FWAIT (X87 insn) */
20657 case 0x9C: /* PUSHF */ {
20658 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20659 mode. So sz==4 actually means sz==8. */
20660 /* 24 July 06: has also been seen with a redundant REX prefix,
20661 so must also allow sz==8. */
20662 if (haveF2orF3(pfx
)) goto decode_failure
;
20663 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20664 if (sz
== 4) sz
= 8;
20665 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20667 t1
= newTemp(Ity_I64
);
20668 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20669 putIReg64(R_RSP
, mkexpr(t1
) );
20671 t2
= newTemp(Ity_I64
);
20672 assign( t2
, mk_amd64g_calculate_rflags_all() );
20674 /* Patch in the D flag. This can simply be a copy of bit 10 of
20675 baseBlock[OFFB_DFLAG]. */
20676 t3
= newTemp(Ity_I64
);
20677 assign( t3
, binop(Iop_Or64
,
20680 IRExpr_Get(OFFB_DFLAG
,Ity_I64
),
20684 /* And patch in the ID flag. */
20685 t4
= newTemp(Ity_I64
);
20686 assign( t4
, binop(Iop_Or64
,
20689 binop(Iop_Shl64
, IRExpr_Get(OFFB_IDFLAG
,Ity_I64
),
20694 /* And patch in the AC flag too. */
20695 t5
= newTemp(Ity_I64
);
20696 assign( t5
, binop(Iop_Or64
,
20699 binop(Iop_Shl64
, IRExpr_Get(OFFB_ACFLAG
,Ity_I64
),
20704 /* if sz==2, the stored value needs to be narrowed. */
20706 storeLE( mkexpr(t1
), unop(Iop_32to16
,
20707 unop(Iop_64to32
,mkexpr(t5
))) );
20709 storeLE( mkexpr(t1
), mkexpr(t5
) );
20711 DIP("pushf%c\n", nameISize(sz
));
20715 case 0x9D: /* POPF */
20716 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20717 So sz==4 actually means sz==8. */
20718 if (haveF2orF3(pfx
)) goto decode_failure
;
20719 vassert(sz
== 2 || sz
== 4);
20720 if (sz
== 4) sz
= 8;
20721 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20722 t1
= newTemp(Ity_I64
); t2
= newTemp(Ity_I64
);
20723 assign(t2
, getIReg64(R_RSP
));
20724 assign(t1
, widenUto64(loadLE(szToITy(sz
),mkexpr(t2
))));
20725 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20726 /* t1 is the flag word. Mask out everything except OSZACP and
20727 set the flags thunk to AMD64G_CC_OP_COPY. */
20728 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
20729 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
20730 stmt( IRStmt_Put( OFFB_CC_DEP1
,
20733 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
20734 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
20735 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
20740 /* Also need to set the D flag, which is held in bit 10 of t1.
20741 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20747 binop(Iop_Shr64
, mkexpr(t1
), mkU8(10)),
20749 mkU64(0xFFFFFFFFFFFFFFFFULL
),
20753 /* And set the ID flag */
20759 binop(Iop_Shr64
, mkexpr(t1
), mkU8(21)),
20765 /* And set the AC flag too */
20771 binop(Iop_Shr64
, mkexpr(t1
), mkU8(18)),
20777 DIP("popf%c\n", nameISize(sz
));
20780 case 0x9E: /* SAHF */
20785 case 0x9F: /* LAHF */
20790 case 0xA0: /* MOV Ob,AL */
20791 if (have66orF2orF3(pfx
)) goto decode_failure
;
20793 /* Fall through ... */
20794 case 0xA1: /* MOV Ov,eAX */
20795 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20796 goto decode_failure
;
20797 d64
= getDisp64(delta
);
20800 addr
= newTemp(Ity_I64
);
20801 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20802 putIRegRAX(sz
, loadLE( ty
, mkexpr(addr
) ));
20803 DIP("mov%c %s0x%llx, %s\n", nameISize(sz
),
20804 segRegTxt(pfx
), (ULong
)d64
,
20808 case 0xA2: /* MOV AL,Ob */
20809 if (have66orF2orF3(pfx
)) goto decode_failure
;
20811 /* Fall through ... */
20812 case 0xA3: /* MOV eAX,Ov */
20813 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20814 goto decode_failure
;
20815 d64
= getDisp64(delta
);
20818 addr
= newTemp(Ity_I64
);
20819 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20820 storeLE( mkexpr(addr
), getIRegRAX(sz
) );
20821 DIP("mov%c %s, %s0x%llx\n", nameISize(sz
), nameIRegRAX(sz
),
20822 segRegTxt(pfx
), (ULong
)d64
);
20827 /* F3 A4: rep movsb */
20828 if (haveF3(pfx
) && !haveF2(pfx
)) {
20831 dis_REP_op ( dres
, AMD64CondAlways
, dis_MOVS
, sz
,
20832 guest_RIP_curr_instr
,
20833 guest_RIP_bbstart
+delta
, "rep movs", pfx
);
20834 dres
->whatNext
= Dis_StopHere
;
20838 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20841 dis_string_op( dis_MOVS
, sz
, "movs", pfx
);
20844 goto decode_failure
;
20848 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20849 if (haveF3(pfx
) && !haveF2(pfx
)) {
20852 dis_REP_op ( dres
, AMD64CondZ
, dis_CMPS
, sz
,
20853 guest_RIP_curr_instr
,
20854 guest_RIP_bbstart
+delta
, "repe cmps", pfx
);
20855 dres
->whatNext
= Dis_StopHere
;
20858 goto decode_failure
;
20862 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20863 if (haveF3(pfx
) && !haveF2(pfx
)) {
20866 dis_REP_op ( dres
, AMD64CondAlways
, dis_STOS
, sz
,
20867 guest_RIP_curr_instr
,
20868 guest_RIP_bbstart
+delta
, "rep stos", pfx
);
20869 vassert(dres
->whatNext
== Dis_StopHere
);
20872 /* AA/AB: stosb/stos{w,l,q} */
20873 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20876 dis_string_op( dis_STOS
, sz
, "stos", pfx
);
20879 goto decode_failure
;
20881 case 0xA8: /* TEST Ib, AL */
20882 if (haveF2orF3(pfx
)) goto decode_failure
;
20883 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
20885 case 0xA9: /* TEST Iv, eAX */
20886 if (haveF2orF3(pfx
)) goto decode_failure
;
20887 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
20890 case 0xAC: /* LODS, no REP prefix */
20892 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", pfx
);
20897 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20898 if (haveF2(pfx
) && !haveF3(pfx
)) {
20901 dis_REP_op ( dres
, AMD64CondNZ
, dis_SCAS
, sz
,
20902 guest_RIP_curr_instr
,
20903 guest_RIP_bbstart
+delta
, "repne scas", pfx
);
20904 vassert(dres
->whatNext
== Dis_StopHere
);
20907 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20908 if (!haveF2(pfx
) && haveF3(pfx
)) {
20911 dis_REP_op ( dres
, AMD64CondZ
, dis_SCAS
, sz
,
20912 guest_RIP_curr_instr
,
20913 guest_RIP_bbstart
+delta
, "repe scas", pfx
);
20914 vassert(dres
->whatNext
== Dis_StopHere
);
20917 /* AE/AF: scasb/scas{w,l,q} */
20918 if (!haveF2(pfx
) && !haveF3(pfx
)) {
20921 dis_string_op( dis_SCAS
, sz
, "scas", pfx
);
20924 goto decode_failure
;
20926 /* XXXX be careful here with moves to AH/BH/CH/DH */
20927 case 0xB0: /* MOV imm,AL */
20928 case 0xB1: /* MOV imm,CL */
20929 case 0xB2: /* MOV imm,DL */
20930 case 0xB3: /* MOV imm,BL */
20931 case 0xB4: /* MOV imm,AH */
20932 case 0xB5: /* MOV imm,CH */
20933 case 0xB6: /* MOV imm,DH */
20934 case 0xB7: /* MOV imm,BH */
20935 if (haveF2orF3(pfx
)) goto decode_failure
;
20936 d64
= getUChar(delta
);
20938 putIRegRexB(1, pfx
, opc
-0xB0, mkU8(d64
));
20939 DIP("movb $%lld,%s\n", d64
, nameIRegRexB(1,pfx
,opc
-0xB0));
20942 case 0xB8: /* MOV imm,eAX */
20943 case 0xB9: /* MOV imm,eCX */
20944 case 0xBA: /* MOV imm,eDX */
20945 case 0xBB: /* MOV imm,eBX */
20946 case 0xBC: /* MOV imm,eSP */
20947 case 0xBD: /* MOV imm,eBP */
20948 case 0xBE: /* MOV imm,eSI */
20949 case 0xBF: /* MOV imm,eDI */
20950 /* This is the one-and-only place where 64-bit literals are
20951 allowed in the instruction stream. */
20952 if (haveF2orF3(pfx
)) goto decode_failure
;
20954 d64
= getDisp64(delta
);
20956 putIRegRexB(8, pfx
, opc
-0xB8, mkU64(d64
));
20957 DIP("movabsq $%lld,%s\n", (Long
)d64
,
20958 nameIRegRexB(8,pfx
,opc
-0xB8));
20960 d64
= getSDisp(imin(4,sz
),delta
);
20961 delta
+= imin(4,sz
);
20962 putIRegRexB(sz
, pfx
, opc
-0xB8,
20963 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20964 DIP("mov%c $%lld,%s\n", nameISize(sz
),
20966 nameIRegRexB(sz
,pfx
,opc
-0xB8));
20970 case 0xC0: { /* Grp2 Ib,Eb */
20971 Bool decode_OK
= True
;
20972 if (haveF2orF3(pfx
)) goto decode_failure
;
20973 modrm
= getUChar(delta
);
20974 am_sz
= lengthAMode(pfx
,delta
);
20976 d64
= getUChar(delta
+ am_sz
);
20978 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20979 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20980 if (!decode_OK
) goto decode_failure
;
20984 case 0xC1: { /* Grp2 Ib,Ev */
20985 Bool decode_OK
= True
;
20986 if (haveF2orF3(pfx
)) goto decode_failure
;
20987 modrm
= getUChar(delta
);
20988 am_sz
= lengthAMode(pfx
,delta
);
20990 d64
= getUChar(delta
+ am_sz
);
20991 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20992 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20993 if (!decode_OK
) goto decode_failure
;
20997 case 0xC2: /* RET imm16 */
20998 if (have66orF3(pfx
)) goto decode_failure
;
20999 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21000 d64
= getUDisp16(delta
);
21002 dis_ret(dres
, vbi
, d64
);
21003 DIP("ret $%lld\n", d64
);
21006 case 0xC3: /* RET */
21007 if (have66(pfx
)) goto decode_failure
;
21008 /* F3 is acceptable on AMD. */
21009 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21010 dis_ret(dres
, vbi
, 0);
21011 DIP(haveF3(pfx
) ? "rep ; ret\n" : "ret\n");
21014 case 0xC6: /* C6 /0 = MOV Ib,Eb */
21016 goto maybe_do_Mov_I_E
;
21017 case 0xC7: /* C7 /0 = MOV Iv,Ev */
21018 goto maybe_do_Mov_I_E
;
21020 modrm
= getUChar(delta
);
21021 if (gregLO3ofRM(modrm
) == 0) {
21022 if (epartIsReg(modrm
)) {
21023 /* Neither F2 nor F3 are allowable. */
21024 if (haveF2orF3(pfx
)) goto decode_failure
;
21025 delta
++; /* mod/rm byte */
21026 d64
= getSDisp(imin(4,sz
),delta
);
21027 delta
+= imin(4,sz
);
21028 putIRegE(sz
, pfx
, modrm
,
21029 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
21030 DIP("mov%c $%lld, %s\n", nameISize(sz
),
21032 nameIRegE(sz
,pfx
,modrm
));
21034 if (haveF2(pfx
)) goto decode_failure
;
21035 /* F3(XRELEASE) is allowable here */
21036 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
21037 /*xtra*/imin(4,sz
) );
21039 d64
= getSDisp(imin(4,sz
),delta
);
21040 delta
+= imin(4,sz
);
21041 storeLE(mkexpr(addr
),
21042 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
21043 DIP("mov%c $%lld, %s\n", nameISize(sz
), (Long
)d64
, dis_buf
);
21047 /* BEGIN HACKY SUPPORT FOR xbegin */
21048 if (opc
== 0xC7 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 4
21049 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21050 delta
++; /* mod/rm byte */
21051 d64
= getSDisp(4,delta
);
21053 guest_RIP_next_mustcheck
= True
;
21054 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21055 Addr64 failAddr
= guest_RIP_bbstart
+ delta
+ d64
;
21056 /* EAX contains the failure status code. Bit 3 is "Set if an
21057 internal buffer overflowed", which seems like the
21058 least-bogus choice we can make here. */
21059 putIRegRAX(4, mkU32(1<<3));
21060 /* And jump to the fail address. */
21061 jmp_lit(dres
, Ijk_Boring
, failAddr
);
21062 vassert(dres
->whatNext
== Dis_StopHere
);
21063 DIP("xbeginq 0x%llx\n", failAddr
);
21066 /* END HACKY SUPPORT FOR xbegin */
21067 /* BEGIN HACKY SUPPORT FOR xabort */
21068 if (opc
== 0xC6 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 1
21069 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21070 delta
++; /* mod/rm byte */
21071 abyte
= getUChar(delta
); delta
++;
21072 /* There is never a real transaction in progress, so do nothing. */
21073 DIP("xabort $%d", (Int
)abyte
);
21076 /* END HACKY SUPPORT FOR xabort */
21077 goto decode_failure
;
21079 case 0xC8: /* ENTER */
21080 /* Same comments re operand size as for LEAVE below apply.
21081 Also, only handles the case "enter $imm16, $0"; other cases
21082 for the second operand (nesting depth) are not handled. */
21084 goto decode_failure
;
21085 d64
= getUDisp16(delta
);
21087 vassert(d64
>= 0 && d64
<= 0xFFFF);
21088 if (getUChar(delta
) != 0)
21089 goto decode_failure
;
21091 /* Intel docs seem to suggest:
21097 t1
= newTemp(Ity_I64
);
21098 assign(t1
, getIReg64(R_RBP
));
21099 t2
= newTemp(Ity_I64
);
21100 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21101 putIReg64(R_RSP
, mkexpr(t2
));
21102 storeLE(mkexpr(t2
), mkexpr(t1
));
21103 putIReg64(R_RBP
, mkexpr(t2
));
21105 putIReg64(R_RSP
, binop(Iop_Sub64
, mkexpr(t2
), mkU64(d64
)));
21107 DIP("enter $%u, $0\n", (UInt
)d64
);
21110 case 0xC9: /* LEAVE */
21111 /* In 64-bit mode this defaults to a 64-bit operand size. There
21112 is no way to encode a 32-bit variant. Hence sz==4 but we do
21115 goto decode_failure
;
21116 t1
= newTemp(Ity_I64
);
21117 t2
= newTemp(Ity_I64
);
21118 assign(t1
, getIReg64(R_RBP
));
21119 /* First PUT RSP looks redundant, but need it because RSP must
21120 always be up-to-date for Memcheck to work... */
21121 putIReg64(R_RSP
, mkexpr(t1
));
21122 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
21123 putIReg64(R_RBP
, mkexpr(t2
));
21124 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8)) );
21128 case 0xCC: /* INT 3 */
21129 jmp_lit(dres
, Ijk_SigTRAP
, guest_RIP_bbstart
+ delta
);
21130 vassert(dres
->whatNext
== Dis_StopHere
);
21134 case 0xCD: /* INT imm8 */
21135 d64
= getUChar(delta
); delta
++;
21137 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21139 jmp_lit(dres
, Ijk_Sys_int210
, guest_RIP_bbstart
+ delta
);
21140 vassert(dres
->whatNext
== Dis_StopHere
);
21141 DIP("int $0xD2\n");
21144 goto decode_failure
;
21146 case 0xD0: { /* Grp2 1,Eb */
21147 Bool decode_OK
= True
;
21148 if (haveF2orF3(pfx
)) goto decode_failure
;
21149 modrm
= getUChar(delta
);
21150 am_sz
= lengthAMode(pfx
,delta
);
21154 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21155 mkU8(d64
), NULL
, &decode_OK
);
21156 if (!decode_OK
) goto decode_failure
;
21160 case 0xD1: { /* Grp2 1,Ev */
21161 Bool decode_OK
= True
;
21162 if (haveF2orF3(pfx
)) goto decode_failure
;
21163 modrm
= getUChar(delta
);
21164 am_sz
= lengthAMode(pfx
,delta
);
21167 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21168 mkU8(d64
), NULL
, &decode_OK
);
21169 if (!decode_OK
) goto decode_failure
;
21173 case 0xD2: { /* Grp2 CL,Eb */
21174 Bool decode_OK
= True
;
21175 if (haveF2orF3(pfx
)) goto decode_failure
;
21176 modrm
= getUChar(delta
);
21177 am_sz
= lengthAMode(pfx
,delta
);
21180 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21181 getIRegCL(), "%cl", &decode_OK
);
21182 if (!decode_OK
) goto decode_failure
;
21186 case 0xD3: { /* Grp2 CL,Ev */
21187 Bool decode_OK
= True
;
21188 if (haveF2orF3(pfx
)) goto decode_failure
;
21189 modrm
= getUChar(delta
);
21190 am_sz
= lengthAMode(pfx
,delta
);
21192 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21193 getIRegCL(), "%cl", &decode_OK
);
21194 if (!decode_OK
) goto decode_failure
;
21198 case 0xD8: /* X87 instructions */
21206 Bool redundantREXWok
= False
;
21208 if (haveF2orF3(pfx
))
21209 goto decode_failure
;
21211 /* kludge to tolerate redundant rex.w prefixes (should do this
21212 properly one day) */
21213 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21214 if ( (opc
== 0xD9 && getUChar(delta
+0) == 0xFA)/*fsqrt*/ )
21215 redundantREXWok
= True
;
21217 Bool size_OK
= False
;
21220 else if ( sz
== 8 )
21221 size_OK
= redundantREXWok
;
21222 else if ( sz
== 2 ) {
21223 int mod_rm
= getUChar(delta
+0);
21224 int reg
= gregLO3ofRM(mod_rm
);
21225 /* The HotSpot JVM uses these */
21226 if ( (opc
== 0xDD) && (reg
== 0 /* FLDL */ ||
21227 reg
== 4 /* FNSAVE */ ||
21228 reg
== 6 /* FRSTOR */ ) )
21231 /* AMD manual says 0x66 size override is ignored, except where
21232 it is meaningful */
21234 goto decode_failure
;
21236 Bool decode_OK
= False
;
21237 delta
= dis_FPU ( &decode_OK
, vbi
, pfx
, delta
);
21239 goto decode_failure
;
21244 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21245 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21246 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21247 { /* The docs say this uses rCX as a count depending on the
21248 address size override, not the operand one. */
21249 IRExpr
* zbit
= NULL
;
21250 IRExpr
* count
= NULL
;
21251 IRExpr
* cond
= NULL
;
21252 const HChar
* xtra
= NULL
;
21254 if (have66orF2orF3(pfx
) || 1==getRexW(pfx
)) goto decode_failure
;
21255 /* So at this point we've rejected any variants which appear to
21256 be governed by the usual operand-size modifiers. Hence only
21257 the address size prefix can have an effect. It changes the
21258 size from 64 (default) to 32. */
21259 d64
= guest_RIP_bbstart
+delta
+1 + getSDisp8(delta
);
21261 if (haveASO(pfx
)) {
21262 /* 64to32 of 64-bit get is merely a get-put improvement
21264 putIReg32(R_RCX
, binop(Iop_Sub32
,
21265 unop(Iop_64to32
, getIReg64(R_RCX
)),
21268 putIReg64(R_RCX
, binop(Iop_Sub64
, getIReg64(R_RCX
), mkU64(1)));
21271 /* This is correct, both for 32- and 64-bit versions. If we're
21272 doing a 32-bit dec and the result is zero then the default
21273 zero extension rule will cause the upper 32 bits to be zero
21274 too. Hence a 64-bit check against zero is OK. */
21275 count
= getIReg64(R_RCX
);
21276 cond
= binop(Iop_CmpNE64
, count
, mkU64(0));
21283 zbit
= mk_amd64g_calculate_condition( AMD64CondZ
);
21284 cond
= mkAnd1(cond
, zbit
);
21288 zbit
= mk_amd64g_calculate_condition( AMD64CondNZ
);
21289 cond
= mkAnd1(cond
, zbit
);
21294 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U64(d64
), OFFB_RIP
) );
21296 DIP("loop%s%s 0x%llx\n", xtra
, haveASO(pfx
) ? "l" : "", (ULong
)d64
);
21301 /* JRCXZ or JECXZ, depending address size override. */
21302 if (have66orF2orF3(pfx
)) goto decode_failure
;
21303 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21305 if (haveASO(pfx
)) {
21307 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21308 unop(Iop_32Uto64
, getIReg32(R_RCX
)),
21314 DIP("jecxz 0x%llx\n", (ULong
)d64
);
21317 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21324 DIP("jrcxz 0x%llx\n", (ULong
)d64
);
21328 case 0xE4: /* IN imm8, AL */
21330 t1
= newTemp(Ity_I64
);
21331 abyte
= getUChar(delta
); delta
++;
21332 assign(t1
, mkU64( abyte
& 0xFF ));
21333 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21335 case 0xE5: /* IN imm8, eAX */
21336 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21337 t1
= newTemp(Ity_I64
);
21338 abyte
= getUChar(delta
); delta
++;
21339 assign(t1
, mkU64( abyte
& 0xFF ));
21340 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21342 case 0xEC: /* IN %DX, AL */
21344 t1
= newTemp(Ity_I64
);
21345 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21346 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21349 case 0xED: /* IN %DX, eAX */
21350 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21351 t1
= newTemp(Ity_I64
);
21352 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21353 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21357 /* At this point, sz indicates the width, and t1 is a 64-bit
21358 value giving port number. */
21360 if (haveF2orF3(pfx
)) goto decode_failure
;
21361 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21363 t2
= newTemp(Ity_I64
);
21364 d
= unsafeIRDirty_1_N(
21367 "amd64g_dirtyhelper_IN",
21368 &amd64g_dirtyhelper_IN
,
21369 mkIRExprVec_2( mkexpr(t1
), mkU64(sz
) )
21371 /* do the call, dumping the result in t2. */
21372 stmt( IRStmt_Dirty(d
) );
21373 putIRegRAX(sz
, narrowTo( ty
, mkexpr(t2
) ) );
21377 case 0xE6: /* OUT AL, imm8 */
21379 t1
= newTemp(Ity_I64
);
21380 abyte
= getUChar(delta
); delta
++;
21381 assign( t1
, mkU64( abyte
& 0xFF ) );
21382 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21384 case 0xE7: /* OUT eAX, imm8 */
21385 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21386 t1
= newTemp(Ity_I64
);
21387 abyte
= getUChar(delta
); delta
++;
21388 assign( t1
, mkU64( abyte
& 0xFF ) );
21389 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21391 case 0xEE: /* OUT AL, %DX */
21393 t1
= newTemp(Ity_I64
);
21394 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21395 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21398 case 0xEF: /* OUT eAX, %DX */
21399 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21400 t1
= newTemp(Ity_I64
);
21401 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21402 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21406 /* At this point, sz indicates the width, and t1 is a 64-bit
21407 value giving port number. */
21409 if (haveF2orF3(pfx
)) goto decode_failure
;
21410 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21412 d
= unsafeIRDirty_0_N(
21414 "amd64g_dirtyhelper_OUT",
21415 &amd64g_dirtyhelper_OUT
,
21416 mkIRExprVec_3( mkexpr(t1
),
21417 widenUto64( getIRegRAX(sz
) ),
21420 stmt( IRStmt_Dirty(d
) );
21424 case 0xE8: /* CALL J4 */
21425 if (haveF3(pfx
)) goto decode_failure
;
21426 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21427 d64
= getSDisp32(delta
); delta
+= 4;
21428 d64
+= (guest_RIP_bbstart
+delta
);
21429 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21430 t1
= newTemp(Ity_I64
);
21431 assign(t1
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21432 putIReg64(R_RSP
, mkexpr(t1
));
21433 storeLE( mkexpr(t1
), mkU64(guest_RIP_bbstart
+delta
));
21434 t2
= newTemp(Ity_I64
);
21435 assign(t2
, mkU64((Addr64
)d64
));
21436 make_redzone_AbiHint(vbi
, t1
, t2
/*nia*/, "call-d32");
21437 if (resteerOkFn( callback_opaque
, (Addr64
)d64
) ) {
21438 /* follow into the call target. */
21439 dres
->whatNext
= Dis_ResteerU
;
21440 dres
->continueAt
= d64
;
21442 jmp_lit(dres
, Ijk_Call
, d64
);
21443 vassert(dres
->whatNext
== Dis_StopHere
);
21445 DIP("call 0x%llx\n", (ULong
)d64
);
21448 case 0xE9: /* Jv (jump, 16/32 offset) */
21449 if (haveF3(pfx
)) goto decode_failure
;
21451 goto decode_failure
; /* JRS added 2004 July 11 */
21452 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21453 d64
= (guest_RIP_bbstart
+delta
+sz
) + getSDisp(sz
,delta
);
21455 if (resteerOkFn(callback_opaque
, (Addr64
)d64
)) {
21456 dres
->whatNext
= Dis_ResteerU
;
21457 dres
->continueAt
= d64
;
21459 jmp_lit(dres
, Ijk_Boring
, d64
);
21460 vassert(dres
->whatNext
== Dis_StopHere
);
21462 DIP("jmp 0x%llx\n", (ULong
)d64
);
21465 case 0xEB: /* Jb (jump, byte offset) */
21466 if (haveF3(pfx
)) goto decode_failure
;
21468 goto decode_failure
; /* JRS added 2004 July 11 */
21469 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21470 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21472 if (resteerOkFn(callback_opaque
, (Addr64
)d64
)) {
21473 dres
->whatNext
= Dis_ResteerU
;
21474 dres
->continueAt
= d64
;
21476 jmp_lit(dres
, Ijk_Boring
, d64
);
21477 vassert(dres
->whatNext
== Dis_StopHere
);
21479 DIP("jmp-8 0x%llx\n", (ULong
)d64
);
21482 case 0xF5: /* CMC */
21483 case 0xF8: /* CLC */
21484 case 0xF9: /* STC */
21485 t1
= newTemp(Ity_I64
);
21486 t2
= newTemp(Ity_I64
);
21487 assign( t1
, mk_amd64g_calculate_rflags_all() );
21490 assign( t2
, binop(Iop_Xor64
, mkexpr(t1
),
21491 mkU64(AMD64G_CC_MASK_C
)));
21495 assign( t2
, binop(Iop_And64
, mkexpr(t1
),
21496 mkU64(~AMD64G_CC_MASK_C
)));
21500 assign( t2
, binop(Iop_Or64
, mkexpr(t1
),
21501 mkU64(AMD64G_CC_MASK_C
)));
21505 vpanic("disInstr(x64)(cmc/clc/stc)");
21507 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21508 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21509 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t2
) ));
21510 /* Set NDEP even though it isn't used. This makes redundant-PUT
21511 elimination of previous stores to this field work better. */
21512 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21515 case 0xF6: { /* Grp3 Eb */
21516 Bool decode_OK
= True
;
21517 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21518 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21519 delta
= dis_Grp3 ( vbi
, pfx
, 1, delta
, &decode_OK
);
21520 if (!decode_OK
) goto decode_failure
;
21524 case 0xF7: { /* Grp3 Ev */
21525 Bool decode_OK
= True
;
21526 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21527 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21528 delta
= dis_Grp3 ( vbi
, pfx
, sz
, delta
, &decode_OK
);
21529 if (!decode_OK
) goto decode_failure
;
21533 case 0xFC: /* CLD */
21534 if (haveF2orF3(pfx
)) goto decode_failure
;
21535 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(1)) );
21539 case 0xFD: /* STD */
21540 if (haveF2orF3(pfx
)) goto decode_failure
;
21541 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(-1ULL)) );
21545 case 0xFE: { /* Grp4 Eb */
21546 Bool decode_OK
= True
;
21547 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21548 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21549 delta
= dis_Grp4 ( vbi
, pfx
, delta
, &decode_OK
);
21550 if (!decode_OK
) goto decode_failure
;
21554 case 0xFF: { /* Grp5 Ev */
21555 Bool decode_OK
= True
;
21556 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21557 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21558 delta
= dis_Grp5 ( vbi
, pfx
, sz
, delta
, dres
, &decode_OK
);
21559 if (!decode_OK
) goto decode_failure
;
21569 return deltaIN
; /* fail */
21573 /*------------------------------------------------------------*/
21575 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21577 /*------------------------------------------------------------*/
21579 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
21581 IRTemp t2
= newTemp(ty
);
21582 if (ty
== Ity_I64
) {
21583 IRTemp m8
= newTemp(Ity_I64
);
21584 IRTemp s8
= newTemp(Ity_I64
);
21585 IRTemp m16
= newTemp(Ity_I64
);
21586 IRTemp s16
= newTemp(Ity_I64
);
21587 IRTemp m32
= newTemp(Ity_I64
);
21588 assign( m8
, mkU64(0xFF00FF00FF00FF00ULL
) );
21592 binop(Iop_And64
,mkexpr(t1
),mkexpr(m8
)),
21595 binop(Iop_Shl64
,mkexpr(t1
),mkU8(8)),
21600 assign( m16
, mkU64(0xFFFF0000FFFF0000ULL
) );
21604 binop(Iop_And64
,mkexpr(s8
),mkexpr(m16
)),
21607 binop(Iop_Shl64
,mkexpr(s8
),mkU8(16)),
21612 assign( m32
, mkU64(0xFFFFFFFF00000000ULL
) );
21616 binop(Iop_And64
,mkexpr(s16
),mkexpr(m32
)),
21619 binop(Iop_Shl64
,mkexpr(s16
),mkU8(32)),
21625 if (ty
== Ity_I32
) {
21629 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
21632 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
21633 mkU32(0x00FF0000)),
21635 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
21636 mkU32(0x0000FF00)),
21637 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
21638 mkU32(0x000000FF) )
21643 if (ty
== Ity_I16
) {
21646 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
21647 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
21652 return IRTemp_INVALID
;
21656 __attribute__((noinline
))
21659 /*MB_OUT*/DisResult
* dres
,
21660 /*MB_OUT*/Bool
* expect_CAS
,
21661 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
21663 void* callback_opaque
,
21664 const VexArchInfo
* archinfo
,
21665 const VexAbiInfo
* vbi
,
21666 Prefix pfx
, Int sz
, Long deltaIN
21670 IRTemp addr
= IRTemp_INVALID
;
21671 IRTemp t1
= IRTemp_INVALID
;
21672 IRTemp t2
= IRTemp_INVALID
;
21678 /* In the first switch, look for ordinary integer insns. */
21679 Long delta
= deltaIN
;
21680 UChar opc
= getUChar(delta
);
21682 switch (opc
) { /* first switch */
21686 modrm
= getUChar(delta
);
21687 /* 0F 01 /0 -- SGDT */
21688 /* 0F 01 /1 -- SIDT */
21689 if (!epartIsReg(modrm
)
21690 && (gregLO3ofRM(modrm
) == 0 || gregLO3ofRM(modrm
) == 1)) {
21691 /* This is really revolting, but ... since each processor
21692 (core) only has one IDT and one GDT, just let the guest
21693 see it (pass-through semantics). I can't see any way to
21694 construct a faked-up value, so don't bother to try. */
21695 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21697 switch (gregLO3ofRM(modrm
)) {
21698 case 0: DIP("sgdt %s\n", dis_buf
); break;
21699 case 1: DIP("sidt %s\n", dis_buf
); break;
21700 default: vassert(0); /*NOTREACHED*/
21702 IRDirty
* d
= unsafeIRDirty_0_N (
21704 "amd64g_dirtyhelper_SxDT",
21705 &amd64g_dirtyhelper_SxDT
,
21706 mkIRExprVec_2( mkexpr(addr
),
21707 mkU64(gregLO3ofRM(modrm
)) )
21709 /* declare we're writing memory */
21710 d
->mFx
= Ifx_Write
;
21711 d
->mAddr
= mkexpr(addr
);
21713 stmt( IRStmt_Dirty(d
) );
21716 /* 0F 01 D0 = XGETBV */
21717 if (modrm
== 0xD0 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21720 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21721 am not sure if that translates in to SEGV or to something
21722 else, in user space. */
21723 t1
= newTemp(Ity_I32
);
21724 assign( t1
, getIReg32(R_RCX
) );
21725 stmt( IRStmt_Exit(binop(Iop_CmpNE32
, mkexpr(t1
), mkU32(0)),
21727 IRConst_U64(guest_RIP_curr_instr
),
21730 putIRegRAX(4, mkU32(7));
21731 putIRegRDX(4, mkU32(0));
21734 /* BEGIN HACKY SUPPORT FOR xend */
21735 /* 0F 01 D5 = XEND */
21736 if (modrm
== 0xD5 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21737 /* We are never in an transaction (xbegin immediately aborts).
21738 So this just always generates a General Protection Fault. */
21740 jmp_lit(dres
, Ijk_SigSEGV
, guest_RIP_bbstart
+ delta
);
21741 vassert(dres
->whatNext
== Dis_StopHere
);
21745 /* END HACKY SUPPORT FOR xend */
21746 /* BEGIN HACKY SUPPORT FOR xtest */
21747 /* 0F 01 D6 = XTEST */
21748 if (modrm
== 0xD6 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21749 /* Sets ZF because there never is a transaction, and all
21750 CF, OF, SF, PF and AF are always cleared by xtest. */
21753 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21754 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21755 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkU64(AMD64G_CC_MASK_Z
) ));
21756 /* Set NDEP even though it isn't used. This makes redundant-PUT
21757 elimination of previous stores to this field work better. */
21758 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21761 /* END HACKY SUPPORT FOR xtest */
21762 /* 0F 01 F9 = RDTSCP */
21763 if (modrm
== 0xF9 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDTSCP
)) {
21765 /* Uses dirty helper:
21766 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21767 declared to wr rax, rcx, rdx
21769 const HChar
* fName
= "amd64g_dirtyhelper_RDTSCP";
21770 void* fAddr
= &amd64g_dirtyhelper_RDTSCP
;
21772 = unsafeIRDirty_0_N ( 0/*regparms*/,
21773 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
21774 /* declare guest state effects */
21776 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
21777 d
->fxState
[0].fx
= Ifx_Write
;
21778 d
->fxState
[0].offset
= OFFB_RAX
;
21779 d
->fxState
[0].size
= 8;
21780 d
->fxState
[1].fx
= Ifx_Write
;
21781 d
->fxState
[1].offset
= OFFB_RCX
;
21782 d
->fxState
[1].size
= 8;
21783 d
->fxState
[2].fx
= Ifx_Write
;
21784 d
->fxState
[2].offset
= OFFB_RDX
;
21785 d
->fxState
[2].size
= 8;
21786 /* execute the dirty call, side-effecting guest state */
21787 stmt( IRStmt_Dirty(d
) );
21788 /* RDTSCP is a serialising insn. So, just in case someone is
21789 using it as a memory fence ... */
21790 stmt( IRStmt_MBE(Imbe_Fence
) );
21794 /* else decode failed */
21798 case 0x05: /* SYSCALL */
21799 guest_RIP_next_mustcheck
= True
;
21800 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21801 putIReg64( R_RCX
, mkU64(guest_RIP_next_assumed
) );
21802 /* It's important that all guest state is up-to-date
21803 at this point. So we declare an end-of-block here, which
21804 forces any cached guest state to be flushed. */
21805 jmp_lit(dres
, Ijk_Sys_syscall
, guest_RIP_next_assumed
);
21806 vassert(dres
->whatNext
== Dis_StopHere
);
21810 case 0x0B: /* UD2 */
21811 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
21812 jmp_lit(dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
21813 vassert(dres
->whatNext
== Dis_StopHere
);
21817 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21818 /* 0F 0D /1 -- prefetchw mem8 */
21819 if (have66orF2orF3(pfx
)) goto decode_failure
;
21820 modrm
= getUChar(delta
);
21821 if (epartIsReg(modrm
)) goto decode_failure
;
21822 if (gregLO3ofRM(modrm
) != 0 && gregLO3ofRM(modrm
) != 1)
21823 goto decode_failure
;
21824 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21826 switch (gregLO3ofRM(modrm
)) {
21827 case 0: DIP("prefetch %s\n", dis_buf
); break;
21828 case 1: DIP("prefetchw %s\n", dis_buf
); break;
21829 default: vassert(0); /*NOTREACHED*/
21838 // Intel CET instructions can have any prefixes before NOPs
21839 // and can use any ModRM, SIB and disp
21840 modrm
= getUChar(delta
);
21841 if (epartIsReg(modrm
)) {
21843 DIP("nop%c\n", nameISize(sz
));
21845 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21847 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
21851 case 0x31: { /* RDTSC */
21852 IRTemp val
= newTemp(Ity_I64
);
21853 IRExpr
** args
= mkIRExprVec_0();
21854 IRDirty
* d
= unsafeIRDirty_1_N (
21857 "amd64g_dirtyhelper_RDTSC",
21858 &amd64g_dirtyhelper_RDTSC
,
21861 if (have66orF2orF3(pfx
)) goto decode_failure
;
21862 /* execute the dirty call, dumping the result in val. */
21863 stmt( IRStmt_Dirty(d
) );
21864 putIRegRDX(4, unop(Iop_64HIto32
, mkexpr(val
)));
21865 putIRegRAX(4, unop(Iop_64to32
, mkexpr(val
)));
21872 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21873 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21874 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21875 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21876 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21877 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21878 case 0x48: /* CMOVSb (cmov negative) */
21879 case 0x49: /* CMOVSb (cmov not negative) */
21880 case 0x4A: /* CMOVP (cmov parity even) */
21881 case 0x4B: /* CMOVNP (cmov parity odd) */
21882 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21883 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21884 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21885 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21886 if (haveF2orF3(pfx
)) goto decode_failure
;
21887 delta
= dis_cmov_E_G(vbi
, pfx
, sz
, (AMD64Condcode
)(opc
- 0x40), delta
);
21892 case 0x82: /* JBb/JNAEb (jump below) */
21893 case 0x83: /* JNBb/JAEb (jump not below) */
21894 case 0x84: /* JZb/JEb (jump zero) */
21895 case 0x85: /* JNZb/JNEb (jump not zero) */
21896 case 0x86: /* JBEb/JNAb (jump below or equal) */
21897 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21898 case 0x88: /* JSb (jump negative) */
21899 case 0x89: /* JSb (jump not negative) */
21900 case 0x8A: /* JP (jump parity even) */
21901 case 0x8B: /* JNP/JPO (jump parity odd) */
21902 case 0x8C: /* JLb/JNGEb (jump less) */
21903 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21904 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21905 case 0x8F: { /* JGb/JNLEb (jump greater) */
21907 const HChar
* comment
= "";
21908 if (haveF3(pfx
)) goto decode_failure
;
21909 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21910 jmpDelta
= getSDisp32(delta
);
21911 d64
= (guest_RIP_bbstart
+delta
+4) + jmpDelta
;
21914 && vex_control
.guest_chase_cond
21915 && (Addr64
)d64
!= (Addr64
)guest_RIP_bbstart
21917 && resteerOkFn( callback_opaque
, (Addr64
)d64
) ) {
21918 /* Speculation: assume this backward branch is taken. So
21919 we need to emit a side-exit to the insn following this
21920 one, on the negation of the condition, and continue at
21921 the branch target address (d64). If we wind up back at
21922 the first instruction of the trace, just stop; it's
21923 better to let the IR loop unroller handle that case. */
21925 mk_amd64g_calculate_condition(
21926 (AMD64Condcode
)(1 ^ (opc
- 0x80))),
21928 IRConst_U64(guest_RIP_bbstart
+delta
),
21931 dres
->whatNext
= Dis_ResteerC
;
21932 dres
->continueAt
= d64
;
21933 comment
= "(assumed taken)";
21937 && vex_control
.guest_chase_cond
21938 && (Addr64
)d64
!= (Addr64
)guest_RIP_bbstart
21940 && resteerOkFn( callback_opaque
, guest_RIP_bbstart
+delta
) ) {
21941 /* Speculation: assume this forward branch is not taken.
21942 So we need to emit a side-exit to d64 (the dest) and
21943 continue disassembling at the insn immediately
21944 following this one. */
21946 mk_amd64g_calculate_condition((AMD64Condcode
)
21952 dres
->whatNext
= Dis_ResteerC
;
21953 dres
->continueAt
= guest_RIP_bbstart
+delta
;
21954 comment
= "(assumed not taken)";
21957 /* Conservative default translation - end the block at
21959 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x80),
21960 guest_RIP_bbstart
+delta
, d64
);
21961 vassert(dres
->whatNext
== Dis_StopHere
);
21963 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc
- 0x80), (ULong
)d64
,
21970 case 0x92: /* set-Bb/set-NAEb (set if below) */
21971 case 0x93: /* set-NBb/set-AEb (set if not below) */
21972 case 0x94: /* set-Zb/set-Eb (set if zero) */
21973 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21974 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21975 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21976 case 0x98: /* set-Sb (set if negative) */
21977 case 0x99: /* set-Sb (set if not negative) */
21978 case 0x9A: /* set-P (set if parity even) */
21979 case 0x9B: /* set-NP (set if parity odd) */
21980 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21981 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21982 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21983 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21984 if (haveF2orF3(pfx
)) goto decode_failure
;
21985 t1
= newTemp(Ity_I8
);
21986 assign( t1
, unop(Iop_1Uto8
,mk_amd64g_calculate_condition(opc
-0x90)) );
21987 modrm
= getUChar(delta
);
21988 if (epartIsReg(modrm
)) {
21990 putIRegE(1, pfx
, modrm
, mkexpr(t1
));
21991 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90),
21992 nameIRegE(1,pfx
,modrm
));
21994 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21996 storeLE( mkexpr(addr
), mkexpr(t1
) );
21997 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90), dis_buf
);
22002 case 0x1B: { /* Future MPX instructions, currently NOPs.
22003 BNDMK b, m F3 0F 1B
22004 BNDCL b, r/m F3 0F 1A
22005 BNDCU b, r/m F2 0F 1A
22006 BNDCN b, r/m F2 0F 1B
22007 BNDMOV b, b/m 66 0F 1A
22008 BNDMOV b/m, b 66 0F 1B
22009 BNDLDX b, mib 0F 1A
22010 BNDSTX mib, b 0F 1B */
22012 /* All instructions have two operands. One operand is always the
22013 bnd register number (bnd0-bnd3, other register numbers are
22014 ignored when MPX isn't enabled, but should generate an
22015 exception if MPX is enabled) given by gregOfRexRM. The other
22016 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
22017 address, all of which can be decoded by using either
22018 eregOfRexRM or disAMode. */
22020 modrm
= getUChar(delta
);
22021 int bnd
= gregOfRexRM(pfx
,modrm
);
22023 if (epartIsReg(modrm
)) {
22024 oper
= nameIReg64 (eregOfRexRM(pfx
,modrm
));
22027 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22032 if (haveF3no66noF2 (pfx
)) {
22034 DIP ("bndmk %s, %%bnd%d\n", oper
, bnd
);
22035 } else /* opc == 0x1A */ {
22036 DIP ("bndcl %s, %%bnd%d\n", oper
, bnd
);
22038 } else if (haveF2no66noF3 (pfx
)) {
22040 DIP ("bndcu %s, %%bnd%d\n", oper
, bnd
);
22041 } else /* opc == 0x1B */ {
22042 DIP ("bndcn %s, %%bnd%d\n", oper
, bnd
);
22044 } else if (have66noF2noF3 (pfx
)) {
22046 DIP ("bndmov %s, %%bnd%d\n", oper
, bnd
);
22047 } else /* opc == 0x1B */ {
22048 DIP ("bndmov %%bnd%d, %s\n", bnd
, oper
);
22050 } else if (haveNo66noF2noF3 (pfx
)) {
22052 DIP ("bndldx %s, %%bnd%d\n", oper
, bnd
);
22053 } else /* opc == 0x1B */ {
22054 DIP ("bndstx %%bnd%d, %s\n", bnd
, oper
);
22056 } else goto decode_failure
;
22061 case 0xA2: { /* CPUID */
22062 /* Uses dirty helper:
22063 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22064 declared to mod rax, wr rbx, rcx, rdx
22067 const HChar
* fName
= NULL
;
22068 void* fAddr
= NULL
;
22070 if (haveF2orF3(pfx
)) goto decode_failure
;
22072 /* This isn't entirely correct, CPUID should depend on the VEX
22073 capabilities, not on the underlying CPU. See bug #324882. */
22074 if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSE3
) &&
22075 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22076 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX2
)) {
22077 fName
= "amd64g_dirtyhelper_CPUID_avx2";
22078 fAddr
= &amd64g_dirtyhelper_CPUID_avx2
;
22079 /* This is a Core-i7-4910-like machine */
22081 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSE3
) &&
22082 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22083 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22084 fName
= "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22085 fAddr
= &amd64g_dirtyhelper_CPUID_avx_and_cx16
;
22086 /* This is a Core-i5-2300-like machine */
22088 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSE3
) &&
22089 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
)) {
22090 fName
= "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22091 fAddr
= &amd64g_dirtyhelper_CPUID_sse42_and_cx16
;
22092 /* This is a Core-i5-670-like machine */
22095 /* Give a CPUID for at least a baseline machine, SSE2
22096 only, and no CX16 */
22097 fName
= "amd64g_dirtyhelper_CPUID_baseline";
22098 fAddr
= &amd64g_dirtyhelper_CPUID_baseline
;
22101 vassert(fName
); vassert(fAddr
);
22102 d
= unsafeIRDirty_0_N ( 0/*regparms*/,
22103 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
22104 /* declare guest state effects */
22106 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
22107 d
->fxState
[0].fx
= Ifx_Modify
;
22108 d
->fxState
[0].offset
= OFFB_RAX
;
22109 d
->fxState
[0].size
= 8;
22110 d
->fxState
[1].fx
= Ifx_Write
;
22111 d
->fxState
[1].offset
= OFFB_RBX
;
22112 d
->fxState
[1].size
= 8;
22113 d
->fxState
[2].fx
= Ifx_Modify
;
22114 d
->fxState
[2].offset
= OFFB_RCX
;
22115 d
->fxState
[2].size
= 8;
22116 d
->fxState
[3].fx
= Ifx_Write
;
22117 d
->fxState
[3].offset
= OFFB_RDX
;
22118 d
->fxState
[3].size
= 8;
22119 /* execute the dirty call, side-effecting guest state */
22120 stmt( IRStmt_Dirty(d
) );
22121 /* CPUID is a serialising insn. So, just in case someone is
22122 using it as a memory fence ... */
22123 stmt( IRStmt_MBE(Imbe_Fence
) );
22128 case 0xA3: { /* BT Gv,Ev */
22129 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22131 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22132 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpNone
, &ok
);
22133 if (!ok
) goto decode_failure
;
22137 case 0xA4: /* SHLDv imm8,Gv,Ev */
22138 modrm
= getUChar(delta
);
22139 d64
= delta
+ lengthAMode(pfx
, delta
);
22140 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22141 delta
= dis_SHLRD_Gv_Ev (
22142 vbi
, pfx
, delta
, modrm
, sz
,
22143 mkU8(getUChar(d64
)), True
, /* literal */
22144 dis_buf
, True
/* left */ );
22147 case 0xA5: /* SHLDv %cl,Gv,Ev */
22148 modrm
= getUChar(delta
);
22149 delta
= dis_SHLRD_Gv_Ev (
22150 vbi
, pfx
, delta
, modrm
, sz
,
22151 getIRegCL(), False
, /* not literal */
22152 "%cl", True
/* left */ );
22155 case 0xAB: { /* BTS Gv,Ev */
22156 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22158 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22159 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpSet
, &ok
);
22160 if (!ok
) goto decode_failure
;
22164 case 0xAC: /* SHRDv imm8,Gv,Ev */
22165 modrm
= getUChar(delta
);
22166 d64
= delta
+ lengthAMode(pfx
, delta
);
22167 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22168 delta
= dis_SHLRD_Gv_Ev (
22169 vbi
, pfx
, delta
, modrm
, sz
,
22170 mkU8(getUChar(d64
)), True
, /* literal */
22171 dis_buf
, False
/* right */ );
22174 case 0xAD: /* SHRDv %cl,Gv,Ev */
22175 modrm
= getUChar(delta
);
22176 delta
= dis_SHLRD_Gv_Ev (
22177 vbi
, pfx
, delta
, modrm
, sz
,
22178 getIRegCL(), False
, /* not literal */
22179 "%cl", False
/* right */);
22182 case 0xAF: /* IMUL Ev, Gv */
22183 if (haveF2orF3(pfx
)) goto decode_failure
;
22184 delta
= dis_mul_E_G ( vbi
, pfx
, sz
, delta
);
22187 case 0xB0: { /* CMPXCHG Gb,Eb */
22189 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22190 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, 1, delta
);
22191 if (!ok
) goto decode_failure
;
22195 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22197 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22198 if (sz
!= 2 && sz
!= 4 && sz
!= 8) goto decode_failure
;
22199 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, sz
, delta
);
22200 if (!ok
) goto decode_failure
;
22204 case 0xB3: { /* BTR Gv,Ev */
22205 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22207 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22208 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpReset
, &ok
);
22209 if (!ok
) goto decode_failure
;
22213 case 0xB6: /* MOVZXb Eb,Gv */
22214 if (haveF2orF3(pfx
)) goto decode_failure
;
22215 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22216 goto decode_failure
;
22217 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, False
);
22220 case 0xB7: /* MOVZXw Ew,Gv */
22221 if (haveF2orF3(pfx
)) goto decode_failure
;
22222 if (sz
!= 4 && sz
!= 8)
22223 goto decode_failure
;
22224 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, False
);
22227 case 0xBA: { /* Grp8 Ib,Ev */
22228 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22229 Bool decode_OK
= False
;
22230 modrm
= getUChar(delta
);
22231 am_sz
= lengthAMode(pfx
,delta
);
22232 d64
= getSDisp8(delta
+ am_sz
);
22233 delta
= dis_Grp8_Imm ( vbi
, pfx
, delta
, modrm
, am_sz
, sz
, d64
,
22236 goto decode_failure
;
22240 case 0xBB: { /* BTC Gv,Ev */
22241 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22243 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22244 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpComp
, &ok
);
22245 if (!ok
) goto decode_failure
;
22249 case 0xBC: /* BSF Gv,Ev */
22250 if (!haveF2orF3(pfx
)
22251 || (haveF3noF2(pfx
)
22252 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
))) {
22253 /* no-F2 no-F3 0F BC = BSF
22254 or F3 0F BC = REP; BSF on older CPUs. */
22255 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, True
);
22258 /* Fall through, since F3 0F BC is TZCNT, and needs to
22259 be handled by dis_ESC_0F__SSE4. */
22262 case 0xBD: /* BSR Gv,Ev */
22263 if (!haveF2orF3(pfx
)
22264 || (haveF3noF2(pfx
)
22265 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
))) {
22266 /* no-F2 no-F3 0F BD = BSR
22267 or F3 0F BD = REP; BSR on older CPUs. */
22268 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, False
);
22271 /* Fall through, since F3 0F BD is LZCNT, and needs to
22272 be handled by dis_ESC_0F__SSE4. */
22275 case 0xBE: /* MOVSXb Eb,Gv */
22276 if (haveF2orF3(pfx
)) goto decode_failure
;
22277 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22278 goto decode_failure
;
22279 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, True
);
22282 case 0xBF: /* MOVSXw Ew,Gv */
22283 if (haveF2orF3(pfx
)) goto decode_failure
;
22284 if (sz
!= 4 && sz
!= 8)
22285 goto decode_failure
;
22286 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, True
);
22289 case 0xC0: { /* XADD Gb,Eb */
22290 Bool decode_OK
= False
;
22291 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, 1, delta
);
22293 goto decode_failure
;
22297 case 0xC1: { /* XADD Gv,Ev */
22298 Bool decode_OK
= False
;
22299 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, sz
, delta
);
22301 goto decode_failure
;
22305 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
22306 IRType elemTy
= sz
==4 ? Ity_I32
: Ity_I64
;
22307 IRTemp expdHi
= newTemp(elemTy
);
22308 IRTemp expdLo
= newTemp(elemTy
);
22309 IRTemp dataHi
= newTemp(elemTy
);
22310 IRTemp dataLo
= newTemp(elemTy
);
22311 IRTemp oldHi
= newTemp(elemTy
);
22312 IRTemp oldLo
= newTemp(elemTy
);
22313 IRTemp flags_old
= newTemp(Ity_I64
);
22314 IRTemp flags_new
= newTemp(Ity_I64
);
22315 IRTemp success
= newTemp(Ity_I1
);
22316 IROp opOR
= sz
==4 ? Iop_Or32
: Iop_Or64
;
22317 IROp opXOR
= sz
==4 ? Iop_Xor32
: Iop_Xor64
;
22318 IROp opCasCmpEQ
= sz
==4 ? Iop_CasCmpEQ32
: Iop_CasCmpEQ64
;
22319 IRExpr
* zero
= sz
==4 ? mkU32(0) : mkU64(0);
22320 IRTemp expdHi64
= newTemp(Ity_I64
);
22321 IRTemp expdLo64
= newTemp(Ity_I64
);
22323 /* Translate this using a DCAS, even if there is no LOCK
22324 prefix. Life is too short to bother with generating two
22325 different translations for the with/without-LOCK-prefix
22327 *expect_CAS
= True
;
22329 /* Decode, and generate address. */
22330 if (have66(pfx
)) goto decode_failure
;
22331 if (sz
!= 4 && sz
!= 8) goto decode_failure
;
22332 if (sz
== 8 && !(archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
))
22333 goto decode_failure
;
22334 modrm
= getUChar(delta
);
22335 if (epartIsReg(modrm
)) goto decode_failure
;
22336 if (gregLO3ofRM(modrm
) != 1) goto decode_failure
;
22337 if (haveF2orF3(pfx
)) {
22338 /* Since the e-part is memory only, F2 or F3 (one or the
22339 other) is acceptable if LOCK is also present. But only
22341 if (sz
== 8) goto decode_failure
;
22342 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) goto decode_failure
;
22345 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22348 /* cmpxchg16b requires an alignment check. */
22350 gen_SEGV_if_not_16_aligned( addr
);
22352 /* Get the expected and new values. */
22353 assign( expdHi64
, getIReg64(R_RDX
) );
22354 assign( expdLo64
, getIReg64(R_RAX
) );
22356 /* These are the correctly-sized expected and new values.
22357 However, we also get expdHi64/expdLo64 above as 64-bits
22358 regardless, because we will need them later in the 32-bit
22359 case (paradoxically). */
22360 assign( expdHi
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdHi64
))
22361 : mkexpr(expdHi64
) );
22362 assign( expdLo
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdLo64
))
22363 : mkexpr(expdLo64
) );
22364 assign( dataHi
, sz
==4 ? getIReg32(R_RCX
) : getIReg64(R_RCX
) );
22365 assign( dataLo
, sz
==4 ? getIReg32(R_RBX
) : getIReg64(R_RBX
) );
22369 mkIRCAS( oldHi
, oldLo
,
22370 Iend_LE
, mkexpr(addr
),
22371 mkexpr(expdHi
), mkexpr(expdLo
),
22372 mkexpr(dataHi
), mkexpr(dataLo
)
22375 /* success when oldHi:oldLo == expdHi:expdLo */
22379 binop(opXOR
, mkexpr(oldHi
), mkexpr(expdHi
)),
22380 binop(opXOR
, mkexpr(oldLo
), mkexpr(expdLo
))
22385 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22386 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22387 which is where they came from originally. Both the actual
22388 contents of these two regs, and any shadow values, are
22389 unchanged. If the DCAS fails then we're putting into
22390 RDX:RAX the value seen in memory. */
22391 /* Now of course there's a complication in the 32-bit case
22392 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22393 unchanged; but if we use the same scheme as in the 64-bit
22394 case, we get hit by the standard rule that a write to the
22395 bottom 32 bits of an integer register zeros the upper 32
22396 bits. And so the upper halves of RDX and RAX mysteriously
22397 become zero. So we have to stuff back in the original
22398 64-bit values which we previously stashed in
22399 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22400 /* It's just _so_ much fun ... */
22402 IRExpr_ITE( mkexpr(success
),
22404 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldHi
))
22408 IRExpr_ITE( mkexpr(success
),
22410 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldLo
))
22414 /* Copy the success bit into the Z flag and leave the others
22416 assign( flags_old
, widenUto64(mk_amd64g_calculate_rflags_all()));
22420 binop(Iop_And64
, mkexpr(flags_old
),
22421 mkU64(~AMD64G_CC_MASK_Z
)),
22424 unop(Iop_1Uto64
, mkexpr(success
)), mkU64(1)),
22425 mkU8(AMD64G_CC_SHIFT_Z
)) ));
22427 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22428 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
22429 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22430 /* Set NDEP even though it isn't used. This makes
22431 redundant-PUT elimination of previous stores to this field
22433 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22435 /* Sheesh. Aren't you glad it was me and not you that had to
22436 write and validate all this grunge? */
22438 DIP("cmpxchg8b %s\n", dis_buf
);
22442 case 0xC8: /* BSWAP %eax */
22449 case 0xCF: /* BSWAP %edi */
22450 if (haveF2orF3(pfx
)) goto decode_failure
;
22451 /* According to the AMD64 docs, this insn can have size 4 or
22454 t1
= newTemp(Ity_I32
);
22455 assign( t1
, getIRegRexB(4, pfx
, opc
-0xC8) );
22456 t2
= math_BSWAP( t1
, Ity_I32
);
22457 putIRegRexB(4, pfx
, opc
-0xC8, mkexpr(t2
));
22458 DIP("bswapl %s\n", nameIRegRexB(4, pfx
, opc
-0xC8));
22462 t1
= newTemp(Ity_I64
);
22463 t2
= newTemp(Ity_I64
);
22464 assign( t1
, getIRegRexB(8, pfx
, opc
-0xC8) );
22465 t2
= math_BSWAP( t1
, Ity_I64
);
22466 putIRegRexB(8, pfx
, opc
-0xC8, mkexpr(t2
));
22467 DIP("bswapq %s\n", nameIRegRexB(8, pfx
, opc
-0xC8));
22470 goto decode_failure
;
22475 } /* first switch */
22478 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22479 /* In the second switch, pick off MMX insns. */
22481 if (!have66orF2orF3(pfx
)) {
22482 /* So there's no SIMD prefix. */
22484 vassert(sz
== 4 || sz
== 8);
22486 switch (opc
) { /* second switch */
22490 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22492 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22493 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22494 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22495 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22499 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22502 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22505 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22509 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22512 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22515 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22517 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22518 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22520 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22524 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22528 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22530 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22531 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22532 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22536 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22540 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22542 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22543 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22544 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22545 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22547 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22551 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22555 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22557 Bool decode_OK
= False
;
22558 delta
= dis_MMX ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22561 goto decode_failure
;
22566 } /* second switch */
22570 /* A couple of MMX corner cases */
22571 if (opc
== 0x0E/* FEMMS */ || opc
== 0x77/* EMMS */) {
22573 goto decode_failure
;
22574 do_EMMS_preamble();
22579 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22580 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22581 without checking the guest hwcaps because SSE2 is a baseline
22582 facility in 64 bit mode. */
22584 Bool decode_OK
= False
;
22585 delta
= dis_ESC_0F__SSE2 ( &decode_OK
,
22586 archinfo
, vbi
, pfx
, sz
, deltaIN
, dres
);
22591 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22592 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22595 Bool decode_OK
= False
;
22596 delta
= dis_ESC_0F__SSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22601 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22602 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22605 Bool decode_OK
= False
;
22606 delta
= dis_ESC_0F__SSE4 ( &decode_OK
,
22607 archinfo
, vbi
, pfx
, sz
, deltaIN
);
22613 return deltaIN
; /* fail */
22617 /*------------------------------------------------------------*/
22619 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22621 /*------------------------------------------------------------*/
22623 __attribute__((noinline
))
22625 Long
dis_ESC_0F38 (
22626 /*MB_OUT*/DisResult
* dres
,
22627 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
22629 void* callback_opaque
,
22630 const VexArchInfo
* archinfo
,
22631 const VexAbiInfo
* vbi
,
22632 Prefix pfx
, Int sz
, Long deltaIN
22635 Long delta
= deltaIN
;
22636 UChar opc
= getUChar(delta
);
22640 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22641 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22642 if (!haveF2orF3(pfx
) && !haveVEX(pfx
)
22643 && (sz
== 2 || sz
== 4 || sz
== 8)) {
22644 IRTemp addr
= IRTemp_INVALID
;
22648 modrm
= getUChar(delta
);
22649 if (epartIsReg(modrm
)) break;
22650 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22652 IRType ty
= szToITy(sz
);
22653 IRTemp src
= newTemp(ty
);
22654 if (opc
== 0xF0) { /* LOAD */
22655 assign(src
, loadLE(ty
, mkexpr(addr
)));
22656 IRTemp dst
= math_BSWAP(src
, ty
);
22657 putIRegG(sz
, pfx
, modrm
, mkexpr(dst
));
22658 DIP("movbe %s,%s\n", dis_buf
, nameIRegG(sz
, pfx
, modrm
));
22659 } else { /* STORE */
22660 assign(src
, getIRegG(sz
, pfx
, modrm
));
22661 IRTemp dst
= math_BSWAP(src
, ty
);
22662 storeLE(mkexpr(addr
), mkexpr(dst
));
22663 DIP("movbe %s,%s\n", nameIRegG(sz
, pfx
, modrm
), dis_buf
);
22667 /* else fall through; maybe one of the decoders below knows what
22676 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22677 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22678 rather than proceeding indiscriminately. */
22680 Bool decode_OK
= False
;
22681 delta
= dis_ESC_0F38__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22686 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22687 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22688 rather than proceeding indiscriminately. */
22690 Bool decode_OK
= False
;
22691 delta
= dis_ESC_0F38__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22696 /* Ignore previous decode attempts and restart from the beginning of
22697 the instruction. */
22699 opc
= getUChar(delta
);
22705 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22706 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22707 /* These were introduced in Broadwell. Gate them on AVX so as to at
22708 least reject them on earlier guests. Has no host requirements. */
22709 if (have66noF2noF3(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22711 sz
= 4; /* 66 prefix but operand size is 4/8 */
22713 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarryX
, True
,
22714 sz
, delta
, "adcx" );
22717 if (haveF3no66noF2(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22718 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagOverX
, True
,
22719 sz
, delta
, "adox" );
22722 /* else fall through */
22730 /*decode_failure:*/
22731 return deltaIN
; /* fail */
22735 /*------------------------------------------------------------*/
22737 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22739 /*------------------------------------------------------------*/
22741 __attribute__((noinline
))
22743 Long
dis_ESC_0F3A (
22744 /*MB_OUT*/DisResult
* dres
,
22745 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
22747 void* callback_opaque
,
22748 const VexArchInfo
* archinfo
,
22749 const VexAbiInfo
* vbi
,
22750 Prefix pfx
, Int sz
, Long deltaIN
22753 Long delta
= deltaIN
;
22754 UChar opc
= getUChar(delta
);
22763 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22764 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22765 rather than proceeding indiscriminately. */
22767 Bool decode_OK
= False
;
22768 delta
= dis_ESC_0F3A__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22773 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22774 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22775 rather than proceeding indiscriminately. */
22777 Bool decode_OK
= False
;
22778 delta
= dis_ESC_0F3A__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22783 return deltaIN
; /* fail */
22787 /*------------------------------------------------------------*/
22789 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22791 /*------------------------------------------------------------*/
22793 /* FIXME: common up with the _256_ version below? */
22795 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22796 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22797 Prefix pfx
, Long delta
, const HChar
* name
,
22798 /* The actual operation. Use either 'op' or 'opfn',
22800 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
22801 Bool invertLeftArg
,
22805 UChar modrm
= getUChar(delta
);
22806 UInt rD
= gregOfRexRM(pfx
, modrm
);
22807 UInt rSL
= getVexNvvvv(pfx
);
22808 IRTemp tSL
= newTemp(Ity_V128
);
22809 IRTemp tSR
= newTemp(Ity_V128
);
22810 IRTemp addr
= IRTemp_INVALID
;
22813 vassert(0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*WIG?*/);
22815 assign(tSL
, invertLeftArg
? unop(Iop_NotV128
, getXMMReg(rSL
))
22818 if (epartIsReg(modrm
)) {
22819 UInt rSR
= eregOfRexRM(pfx
, modrm
);
22821 assign(tSR
, getXMMReg(rSR
));
22822 DIP("%s %s,%s,%s\n",
22823 name
, nameXMMReg(rSR
), nameXMMReg(rSL
), nameXMMReg(rD
));
22825 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22827 assign(tSR
, loadLE(Ity_V128
, mkexpr(addr
)));
22828 DIP("%s %s,%s,%s\n",
22829 name
, dis_buf
, nameXMMReg(rSL
), nameXMMReg(rD
));
22832 IRTemp res
= IRTemp_INVALID
;
22833 if (op
!= Iop_INVALID
) {
22834 vassert(opFn
== NULL
);
22835 res
= newTemp(Ity_V128
);
22836 if (requiresRMode(op
)) {
22837 IRTemp rm
= newTemp(Ity_I32
);
22838 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22839 assign(res
, swapArgs
22840 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
22841 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
22843 assign(res
, swapArgs
22844 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
22845 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
22848 vassert(opFn
!= NULL
);
22849 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
22852 putYMMRegLoAndZU(rD
, mkexpr(res
));
22859 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22860 for the operation, no inversion of the left arg, and no swapping of
22863 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22864 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22865 Prefix pfx
, Long delta
, const HChar
* name
,
22869 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22870 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
22874 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22875 generator to compute the result, no inversion of the left
22876 arg, and no swapping of args. */
22878 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22879 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22880 Prefix pfx
, Long delta
, const HChar
* name
,
22881 IRTemp(*opFn
)(IRTemp
,IRTemp
)
22884 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22885 uses_vvvv
, vbi
, pfx
, delta
, name
,
22886 Iop_INVALID
, opFn
, False
, False
);
22890 /* Vector by scalar shift of V by the amount specified at the bottom
22892 static ULong
dis_AVX128_shiftV_byE ( const VexAbiInfo
* vbi
,
22893 Prefix pfx
, Long delta
,
22894 const HChar
* opname
, IROp op
)
22899 Bool shl
, shr
, sar
;
22900 UChar modrm
= getUChar(delta
);
22901 UInt rG
= gregOfRexRM(pfx
,modrm
);
22902 UInt rV
= getVexNvvvv(pfx
);;
22903 IRTemp g0
= newTemp(Ity_V128
);
22904 IRTemp g1
= newTemp(Ity_V128
);
22905 IRTemp amt
= newTemp(Ity_I64
);
22906 IRTemp amt8
= newTemp(Ity_I8
);
22907 if (epartIsReg(modrm
)) {
22908 UInt rE
= eregOfRexRM(pfx
,modrm
);
22909 assign( amt
, getXMMRegLane64(rE
, 0) );
22910 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
22911 nameXMMReg(rV
), nameXMMReg(rG
) );
22914 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22915 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
22916 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
22919 assign( g0
, getXMMReg(rV
) );
22920 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
22922 shl
= shr
= sar
= False
;
22925 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
22926 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
22927 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
22928 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
22929 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
22930 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
22931 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
22932 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
22933 default: vassert(0);
22940 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
22941 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
22950 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
22951 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
22952 binop(op
, mkexpr(g0
), mkU8(size
-1))
22959 putYMMRegLoAndZU( rG
, mkexpr(g1
) );
22964 /* Vector by scalar shift of V by the amount specified at the bottom
22966 static ULong
dis_AVX256_shiftV_byE ( const VexAbiInfo
* vbi
,
22967 Prefix pfx
, Long delta
,
22968 const HChar
* opname
, IROp op
)
22973 Bool shl
, shr
, sar
;
22974 UChar modrm
= getUChar(delta
);
22975 UInt rG
= gregOfRexRM(pfx
,modrm
);
22976 UInt rV
= getVexNvvvv(pfx
);;
22977 IRTemp g0
= newTemp(Ity_V256
);
22978 IRTemp g1
= newTemp(Ity_V256
);
22979 IRTemp amt
= newTemp(Ity_I64
);
22980 IRTemp amt8
= newTemp(Ity_I8
);
22981 if (epartIsReg(modrm
)) {
22982 UInt rE
= eregOfRexRM(pfx
,modrm
);
22983 assign( amt
, getXMMRegLane64(rE
, 0) );
22984 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
22985 nameYMMReg(rV
), nameYMMReg(rG
) );
22988 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22989 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
22990 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
22993 assign( g0
, getYMMReg(rV
) );
22994 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
22996 shl
= shr
= sar
= False
;
22999 case Iop_ShlN16x16
: shl
= True
; size
= 32; break;
23000 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23001 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23002 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23003 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23004 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23005 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23006 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23007 default: vassert(0);
23014 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23015 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23016 binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23024 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23025 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23026 binop(op
, mkexpr(g0
), mkU8(size
-1))
23033 putYMMReg( rG
, mkexpr(g1
) );
23038 /* Vector by vector shift of V by the amount specified at the bottom
23039 of E. Vector by vector shifts are defined for all shift amounts,
23040 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23042 static ULong
dis_AVX_var_shiftV_byE ( const VexAbiInfo
* vbi
,
23043 Prefix pfx
, Long delta
,
23044 const HChar
* opname
, IROp op
, Bool isYMM
)
23049 UChar modrm
= getUChar(delta
);
23050 UInt rG
= gregOfRexRM(pfx
,modrm
);
23051 UInt rV
= getVexNvvvv(pfx
);;
23052 IRTemp sV
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23053 IRTemp amt
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23054 IRTemp amts
[8], sVs
[8], res
[8];
23055 if (epartIsReg(modrm
)) {
23056 UInt rE
= eregOfRexRM(pfx
,modrm
);
23057 assign( amt
, isYMM
? getYMMReg(rE
) : getXMMReg(rE
) );
23059 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rE
),
23060 nameYMMReg(rV
), nameYMMReg(rG
) );
23062 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23063 nameXMMReg(rV
), nameXMMReg(rG
) );
23067 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23068 assign( amt
, loadLE(isYMM
? Ity_V256
: Ity_V128
, mkexpr(addr
)) );
23070 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
),
23073 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
),
23078 assign( sV
, isYMM
? getYMMReg(rV
) : getXMMReg(rV
) );
23082 case Iop_Shl32
: size
= 32; break;
23083 case Iop_Shl64
: size
= 64; break;
23084 case Iop_Sar32
: size
= 32; break;
23085 case Iop_Shr32
: size
= 32; break;
23086 case Iop_Shr64
: size
= 64; break;
23087 default: vassert(0);
23090 for (i
= 0; i
< 8; i
++) {
23091 sVs
[i
] = IRTemp_INVALID
;
23092 amts
[i
] = IRTemp_INVALID
;
23097 breakupV256to32s( sV
, &sVs
[7], &sVs
[6], &sVs
[5], &sVs
[4],
23098 &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23099 breakupV256to32s( amt
, &amts
[7], &amts
[6], &amts
[5], &amts
[4],
23100 &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23102 breakupV128to32s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23103 breakupV128to32s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23108 breakupV256to64s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23109 breakupV256to64s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23111 breakupV128to64s( sV
, &sVs
[1], &sVs
[0] );
23112 breakupV128to64s( amt
, &amts
[1], &amts
[0] );
23115 default: vassert(0);
23117 for (i
= 0; i
< 8; i
++)
23118 if (sVs
[i
] != IRTemp_INVALID
) {
23119 res
[i
] = size
== 32 ? newTemp(Ity_I32
) : newTemp(Ity_I64
);
23122 binop(size
== 32 ? Iop_CmpLT32U
: Iop_CmpLT64U
,
23124 size
== 32 ? mkU32(size
) : mkU64(size
)),
23125 binop(op
, mkexpr(sVs
[i
]),
23126 unop(size
== 32 ? Iop_32to8
: Iop_64to8
,
23128 op
== Iop_Sar32
? binop(op
, mkexpr(sVs
[i
]), mkU8(size
-1))
23129 : size
== 32 ? mkU32(0) : mkU64(0)
23134 for (i
= 0; i
< 8; i
++)
23135 putYMMRegLane32( rG
, i
, (i
< 4 || isYMM
)
23136 ? mkexpr(res
[i
]) : mkU32(0) );
23139 for (i
= 0; i
< 4; i
++)
23140 putYMMRegLane64( rG
, i
, (i
< 2 || isYMM
)
23141 ? mkexpr(res
[i
]) : mkU64(0) );
23143 default: vassert(0);
23150 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23151 version of dis_SSE_shiftE_imm. */
23153 Long
dis_AVX128_shiftE_to_V_imm( Prefix pfx
,
23154 Long delta
, const HChar
* opname
, IROp op
)
23156 Bool shl
, shr
, sar
;
23157 UChar rm
= getUChar(delta
);
23158 IRTemp e0
= newTemp(Ity_V128
);
23159 IRTemp e1
= newTemp(Ity_V128
);
23160 UInt rD
= getVexNvvvv(pfx
);
23162 vassert(epartIsReg(rm
));
23163 vassert(gregLO3ofRM(rm
) == 2
23164 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23165 amt
= getUChar(delta
+1);
23167 DIP("%s $%d,%s,%s\n", opname
,
23169 nameXMMReg(eregOfRexRM(pfx
,rm
)),
23171 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
23173 shl
= shr
= sar
= False
;
23176 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
23177 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
23178 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
23179 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
23180 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
23181 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
23182 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
23183 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
23184 default: vassert(0);
23188 assign( e1
, amt
>= size
23190 : binop(op
, mkexpr(e0
), mkU8(amt
))
23194 assign( e1
, amt
>= size
23195 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23196 : binop(op
, mkexpr(e0
), mkU8(amt
))
23202 putYMMRegLoAndZU( rD
, mkexpr(e1
) );
23207 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23208 version of dis_AVX128_shiftE_to_V_imm. */
23210 Long
dis_AVX256_shiftE_to_V_imm( Prefix pfx
,
23211 Long delta
, const HChar
* opname
, IROp op
)
23213 Bool shl
, shr
, sar
;
23214 UChar rm
= getUChar(delta
);
23215 IRTemp e0
= newTemp(Ity_V256
);
23216 IRTemp e1
= newTemp(Ity_V256
);
23217 UInt rD
= getVexNvvvv(pfx
);
23219 vassert(epartIsReg(rm
));
23220 vassert(gregLO3ofRM(rm
) == 2
23221 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23222 amt
= getUChar(delta
+1);
23224 DIP("%s $%d,%s,%s\n", opname
,
23226 nameYMMReg(eregOfRexRM(pfx
,rm
)),
23228 assign( e0
, getYMMReg(eregOfRexRM(pfx
,rm
)) );
23230 shl
= shr
= sar
= False
;
23233 case Iop_ShlN16x16
: shl
= True
; size
= 16; break;
23234 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23235 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23236 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23237 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23238 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23239 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23240 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23241 default: vassert(0);
23246 assign( e1
, amt
>= size
23247 ? binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23248 : binop(op
, mkexpr(e0
), mkU8(amt
))
23252 assign( e1
, amt
>= size
23253 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23254 : binop(op
, mkexpr(e0
), mkU8(amt
))
23260 putYMMReg( rD
, mkexpr(e1
) );
23265 /* Lower 64-bit lane only AVX128 binary operation:
23266 G[63:0] = V[63:0] `op` E[63:0]
23267 G[127:64] = V[127:64]
23269 The specified op must be of the 64F0x2 kind, so that it
23270 copies the upper half of the left operand to the result.
23272 static Long
dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool
* uses_vvvv
,
23273 const VexAbiInfo
* vbi
,
23274 Prefix pfx
, Long delta
,
23275 const HChar
* opname
, IROp op
)
23280 UChar rm
= getUChar(delta
);
23281 UInt rG
= gregOfRexRM(pfx
,rm
);
23282 UInt rV
= getVexNvvvv(pfx
);
23283 IRExpr
* vpart
= getXMMReg(rV
);
23284 if (epartIsReg(rm
)) {
23285 UInt rE
= eregOfRexRM(pfx
,rm
);
23286 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23287 DIP("%s %s,%s,%s\n", opname
,
23288 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23291 /* We can only do a 64-bit memory read, so the upper half of the
23292 E operand needs to be made simply of zeroes. */
23293 IRTemp epart
= newTemp(Ity_V128
);
23294 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23295 assign( epart
, unop( Iop_64UtoV128
,
23296 loadLE(Ity_I64
, mkexpr(addr
))) );
23297 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23298 DIP("%s %s,%s,%s\n", opname
,
23299 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23300 delta
= delta
+alen
;
23302 putYMMRegLane128( rG
, 1, mkV128(0) );
23308 /* Lower 64-bit lane only AVX128 unary operation:
23309 G[63:0] = op(E[63:0])
23310 G[127:64] = V[127:64]
23312 The specified op must be of the 64F0x2 kind, so that it
23313 copies the upper half of the operand to the result.
23315 static Long
dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool
* uses_vvvv
,
23316 const VexAbiInfo
* vbi
,
23317 Prefix pfx
, Long delta
,
23318 const HChar
* opname
, IROp op
)
23323 UChar rm
= getUChar(delta
);
23324 UInt rG
= gregOfRexRM(pfx
,rm
);
23325 UInt rV
= getVexNvvvv(pfx
);
23326 IRTemp e64
= newTemp(Ity_I64
);
23328 /* Fetch E[63:0] */
23329 if (epartIsReg(rm
)) {
23330 UInt rE
= eregOfRexRM(pfx
,rm
);
23331 assign(e64
, getXMMRegLane64(rE
, 0));
23332 DIP("%s %s,%s,%s\n", opname
,
23333 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23336 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23337 assign(e64
, loadLE(Ity_I64
, mkexpr(addr
)));
23338 DIP("%s %s,%s,%s\n", opname
,
23339 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23343 /* Create a value 'arg' as V[127:64]++E[63:0] */
23344 IRTemp arg
= newTemp(Ity_V128
);
23346 binop(Iop_SetV128lo64
,
23347 getXMMReg(rV
), mkexpr(e64
)));
23348 /* and apply op to it */
23349 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23355 /* Lower 32-bit lane only AVX128 unary operation:
23356 G[31:0] = op(E[31:0])
23357 G[127:32] = V[127:32]
23359 The specified op must be of the 32F0x4 kind, so that it
23360 copies the upper 3/4 of the operand to the result.
23362 static Long
dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool
* uses_vvvv
,
23363 const VexAbiInfo
* vbi
,
23364 Prefix pfx
, Long delta
,
23365 const HChar
* opname
, IROp op
)
23370 UChar rm
= getUChar(delta
);
23371 UInt rG
= gregOfRexRM(pfx
,rm
);
23372 UInt rV
= getVexNvvvv(pfx
);
23373 IRTemp e32
= newTemp(Ity_I32
);
23375 /* Fetch E[31:0] */
23376 if (epartIsReg(rm
)) {
23377 UInt rE
= eregOfRexRM(pfx
,rm
);
23378 assign(e32
, getXMMRegLane32(rE
, 0));
23379 DIP("%s %s,%s,%s\n", opname
,
23380 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23383 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23384 assign(e32
, loadLE(Ity_I32
, mkexpr(addr
)));
23385 DIP("%s %s,%s,%s\n", opname
,
23386 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23390 /* Create a value 'arg' as V[127:32]++E[31:0] */
23391 IRTemp arg
= newTemp(Ity_V128
);
23393 binop(Iop_SetV128lo32
,
23394 getXMMReg(rV
), mkexpr(e32
)));
23395 /* and apply op to it */
23396 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23402 /* Lower 32-bit lane only AVX128 binary operation:
23403 G[31:0] = V[31:0] `op` E[31:0]
23404 G[127:32] = V[127:32]
23406 The specified op must be of the 32F0x4 kind, so that it
23407 copies the upper 3/4 of the left operand to the result.
23409 static Long
dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool
* uses_vvvv
,
23410 const VexAbiInfo
* vbi
,
23411 Prefix pfx
, Long delta
,
23412 const HChar
* opname
, IROp op
)
23417 UChar rm
= getUChar(delta
);
23418 UInt rG
= gregOfRexRM(pfx
,rm
);
23419 UInt rV
= getVexNvvvv(pfx
);
23420 IRExpr
* vpart
= getXMMReg(rV
);
23421 if (epartIsReg(rm
)) {
23422 UInt rE
= eregOfRexRM(pfx
,rm
);
23423 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23424 DIP("%s %s,%s,%s\n", opname
,
23425 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23428 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23429 E operand needs to be made simply of zeroes. */
23430 IRTemp epart
= newTemp(Ity_V128
);
23431 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23432 assign( epart
, unop( Iop_32UtoV128
,
23433 loadLE(Ity_I32
, mkexpr(addr
))) );
23434 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23435 DIP("%s %s,%s,%s\n", opname
,
23436 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23437 delta
= delta
+alen
;
23439 putYMMRegLane128( rG
, 1, mkV128(0) );
23445 /* All-lanes AVX128 binary operation:
23446 G[127:0] = V[127:0] `op` E[127:0]
23449 static Long
dis_AVX128_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23450 const VexAbiInfo
* vbi
,
23451 Prefix pfx
, Long delta
,
23452 const HChar
* opname
, IROp op
)
23454 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23455 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23456 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23461 /* Handles AVX128 32F/64F comparisons. A derivative of
23462 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23463 original delta to indicate failure. */
23465 Long
dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23466 const VexAbiInfo
* vbi
,
23467 Prefix pfx
, Long delta
,
23468 const HChar
* opname
, Bool all_lanes
, Int sz
)
23470 vassert(sz
== 4 || sz
== 8);
23471 Long deltaIN
= delta
;
23476 Bool preSwap
= False
;
23477 IROp op
= Iop_INVALID
;
23478 Bool postNot
= False
;
23479 IRTemp plain
= newTemp(Ity_V128
);
23480 UChar rm
= getUChar(delta
);
23481 UInt rG
= gregOfRexRM(pfx
, rm
);
23482 UInt rV
= getVexNvvvv(pfx
);
23483 IRTemp argL
= newTemp(Ity_V128
);
23484 IRTemp argR
= newTemp(Ity_V128
);
23486 assign(argL
, getXMMReg(rV
));
23487 if (epartIsReg(rm
)) {
23488 imm8
= getUChar(delta
+1);
23489 Bool ok
= findSSECmpOp(&preSwap
, &op
, &postNot
, imm8
, all_lanes
, sz
);
23490 if (!ok
) return deltaIN
; /* FAIL */
23491 UInt rE
= eregOfRexRM(pfx
,rm
);
23492 assign(argR
, getXMMReg(rE
));
23494 DIP("%s $%u,%s,%s,%s\n",
23496 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23498 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23499 imm8
= getUChar(delta
+alen
);
23500 Bool ok
= findSSECmpOp(&preSwap
, &op
, &postNot
, imm8
, all_lanes
, sz
);
23501 if (!ok
) return deltaIN
; /* FAIL */
23503 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
23504 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
23505 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
))));
23507 DIP("%s $%u,%s,%s,%s\n",
23508 opname
, imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23511 assign(plain
, preSwap
? binop(op
, mkexpr(argR
), mkexpr(argL
))
23512 : binop(op
, mkexpr(argL
), mkexpr(argR
)));
23515 /* This is simple: just invert the result, if necessary, and
23518 putYMMRegLoAndZU( rG
, unop(Iop_NotV128
, mkexpr(plain
)) );
23520 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23525 /* More complex. It's a one-lane-only, hence need to possibly
23526 invert only that one lane. But at least the other lanes are
23527 correctly "in" the result, having been copied from the left
23530 IRExpr
* mask
= mkV128(sz
==4 ? 0x000F : 0x00FF);
23531 putYMMRegLoAndZU( rG
, binop(Iop_XorV128
, mkexpr(plain
),
23534 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23538 /* This is the most complex case. One-lane-only, but the args
23539 were swapped. So we have to possibly invert the bottom lane,
23540 and (definitely) we have to copy the upper lane(s) from argL
23541 since, due to the swapping, what's currently there is from
23542 argR, which is not correct. */
23543 IRTemp res
= newTemp(Ity_V128
);
23544 IRTemp mask
= newTemp(Ity_V128
);
23545 IRTemp notMask
= newTemp(Ity_V128
);
23546 assign(mask
, mkV128(sz
==4 ? 0x000F : 0x00FF));
23547 assign(notMask
, mkV128(sz
==4 ? 0xFFF0 : 0xFF00));
23552 unop(Iop_NotV128
, mkexpr(plain
)),
23554 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23561 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23563 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23571 /* Handles AVX256 32F/64F comparisons. A derivative of
23572 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23573 original delta to indicate failure. */
23575 Long
dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23576 const VexAbiInfo
* vbi
,
23577 Prefix pfx
, Long delta
,
23578 const HChar
* opname
, Int sz
)
23580 vassert(sz
== 4 || sz
== 8);
23581 Long deltaIN
= delta
;
23586 Bool preSwap
= False
;
23587 IROp op
= Iop_INVALID
;
23588 Bool postNot
= False
;
23589 IRTemp plain
= newTemp(Ity_V256
);
23590 UChar rm
= getUChar(delta
);
23591 UInt rG
= gregOfRexRM(pfx
, rm
);
23592 UInt rV
= getVexNvvvv(pfx
);
23593 IRTemp argL
= newTemp(Ity_V256
);
23594 IRTemp argR
= newTemp(Ity_V256
);
23595 IRTemp argLhi
= IRTemp_INVALID
;
23596 IRTemp argLlo
= IRTemp_INVALID
;
23597 IRTemp argRhi
= IRTemp_INVALID
;
23598 IRTemp argRlo
= IRTemp_INVALID
;
23600 assign(argL
, getYMMReg(rV
));
23601 if (epartIsReg(rm
)) {
23602 imm8
= getUChar(delta
+1);
23603 Bool ok
= findSSECmpOp(&preSwap
, &op
, &postNot
, imm8
,
23604 True
/*all_lanes*/, sz
);
23605 if (!ok
) return deltaIN
; /* FAIL */
23606 UInt rE
= eregOfRexRM(pfx
,rm
);
23607 assign(argR
, getYMMReg(rE
));
23609 DIP("%s $%u,%s,%s,%s\n",
23611 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
23613 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23614 imm8
= getUChar(delta
+alen
);
23615 Bool ok
= findSSECmpOp(&preSwap
, &op
, &postNot
, imm8
,
23616 True
/*all_lanes*/, sz
);
23617 if (!ok
) return deltaIN
; /* FAIL */
23618 assign(argR
, loadLE(Ity_V256
, mkexpr(addr
)) );
23620 DIP("%s $%u,%s,%s,%s\n",
23621 opname
, imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
23624 breakupV256toV128s( preSwap
? argR
: argL
, &argLhi
, &argLlo
);
23625 breakupV256toV128s( preSwap
? argL
: argR
, &argRhi
, &argRlo
);
23626 assign(plain
, binop( Iop_V128HLtoV256
,
23627 binop(op
, mkexpr(argLhi
), mkexpr(argRhi
)),
23628 binop(op
, mkexpr(argLlo
), mkexpr(argRlo
)) ) );
23630 /* This is simple: just invert the result, if necessary, and
23633 putYMMReg( rG
, unop(Iop_NotV256
, mkexpr(plain
)) );
23635 putYMMReg( rG
, mkexpr(plain
) );
23643 /* Handles AVX128 unary E-to-G all-lanes operations. */
23645 Long
dis_AVX128_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23646 const VexAbiInfo
* vbi
,
23647 Prefix pfx
, Long delta
,
23648 const HChar
* opname
,
23649 IRTemp (*opFn
)(IRTemp
) )
23654 IRTemp res
= newTemp(Ity_V128
);
23655 IRTemp arg
= newTemp(Ity_V128
);
23656 UChar rm
= getUChar(delta
);
23657 UInt rG
= gregOfRexRM(pfx
, rm
);
23658 if (epartIsReg(rm
)) {
23659 UInt rE
= eregOfRexRM(pfx
,rm
);
23660 assign(arg
, getXMMReg(rE
));
23662 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23664 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23665 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23667 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23670 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23671 *uses_vvvv
= False
;
23676 /* Handles AVX128 unary E-to-G all-lanes operations. */
23678 Long
dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
23679 const VexAbiInfo
* vbi
,
23680 Prefix pfx
, Long delta
,
23681 const HChar
* opname
, IROp op
)
23686 IRTemp arg
= newTemp(Ity_V128
);
23687 UChar rm
= getUChar(delta
);
23688 UInt rG
= gregOfRexRM(pfx
, rm
);
23689 if (epartIsReg(rm
)) {
23690 UInt rE
= eregOfRexRM(pfx
,rm
);
23691 assign(arg
, getXMMReg(rE
));
23693 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23695 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23696 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23698 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23700 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23701 // up in the usual way.
23702 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
23703 /* XXXROUNDINGFIXME */
23704 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), mkexpr(arg
))
23705 : unop(op
, mkexpr(arg
));
23706 putYMMRegLoAndZU( rG
, res
);
23707 *uses_vvvv
= False
;
23712 /* FIXME: common up with the _128_ version above? */
23714 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23715 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23716 Prefix pfx
, Long delta
, const HChar
* name
,
23717 /* The actual operation. Use either 'op' or 'opfn',
23719 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
23720 Bool invertLeftArg
,
23724 UChar modrm
= getUChar(delta
);
23725 UInt rD
= gregOfRexRM(pfx
, modrm
);
23726 UInt rSL
= getVexNvvvv(pfx
);
23727 IRTemp tSL
= newTemp(Ity_V256
);
23728 IRTemp tSR
= newTemp(Ity_V256
);
23729 IRTemp addr
= IRTemp_INVALID
;
23732 vassert(1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*WIG?*/);
23734 assign(tSL
, invertLeftArg
? unop(Iop_NotV256
, getYMMReg(rSL
))
23737 if (epartIsReg(modrm
)) {
23738 UInt rSR
= eregOfRexRM(pfx
, modrm
);
23740 assign(tSR
, getYMMReg(rSR
));
23741 DIP("%s %s,%s,%s\n",
23742 name
, nameYMMReg(rSR
), nameYMMReg(rSL
), nameYMMReg(rD
));
23744 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23746 assign(tSR
, loadLE(Ity_V256
, mkexpr(addr
)));
23747 DIP("%s %s,%s,%s\n",
23748 name
, dis_buf
, nameYMMReg(rSL
), nameYMMReg(rD
));
23751 IRTemp res
= IRTemp_INVALID
;
23752 if (op
!= Iop_INVALID
) {
23753 vassert(opFn
== NULL
);
23754 res
= newTemp(Ity_V256
);
23755 if (requiresRMode(op
)) {
23756 IRTemp rm
= newTemp(Ity_I32
);
23757 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23758 assign(res
, swapArgs
23759 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
23760 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
23762 assign(res
, swapArgs
23763 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
23764 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
23767 vassert(opFn
!= NULL
);
23768 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
23771 putYMMReg(rD
, mkexpr(res
));
23778 /* All-lanes AVX256 binary operation:
23779 G[255:0] = V[255:0] `op` E[255:0]
23781 static Long
dis_AVX256_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23782 const VexAbiInfo
* vbi
,
23783 Prefix pfx
, Long delta
,
23784 const HChar
* opname
, IROp op
)
23786 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23787 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23788 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23793 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23794 for the operation, no inversion of the left arg, and no swapping of
23797 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23798 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23799 Prefix pfx
, Long delta
, const HChar
* name
,
23803 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23804 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
23808 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23809 generator to compute the result, no inversion of the left
23810 arg, and no swapping of args. */
23812 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23813 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23814 Prefix pfx
, Long delta
, const HChar
* name
,
23815 IRTemp(*opFn
)(IRTemp
,IRTemp
)
23818 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23819 uses_vvvv
, vbi
, pfx
, delta
, name
,
23820 Iop_INVALID
, opFn
, False
, False
);
23824 /* Handles AVX256 unary E-to-G all-lanes operations. */
23826 Long
dis_AVX256_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23827 const VexAbiInfo
* vbi
,
23828 Prefix pfx
, Long delta
,
23829 const HChar
* opname
,
23830 IRTemp (*opFn
)(IRTemp
) )
23835 IRTemp res
= newTemp(Ity_V256
);
23836 IRTemp arg
= newTemp(Ity_V256
);
23837 UChar rm
= getUChar(delta
);
23838 UInt rG
= gregOfRexRM(pfx
, rm
);
23839 if (epartIsReg(rm
)) {
23840 UInt rE
= eregOfRexRM(pfx
,rm
);
23841 assign(arg
, getYMMReg(rE
));
23843 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
23845 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23846 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
23848 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
23851 putYMMReg( rG
, mkexpr(res
) );
23852 *uses_vvvv
= False
;
23857 /* Handles AVX256 unary E-to-G all-lanes operations. */
23859 Long
dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
23860 const VexAbiInfo
* vbi
,
23861 Prefix pfx
, Long delta
,
23862 const HChar
* opname
, IROp op
)
23867 IRTemp arg
= newTemp(Ity_V256
);
23868 UChar rm
= getUChar(delta
);
23869 UInt rG
= gregOfRexRM(pfx
, rm
);
23870 if (epartIsReg(rm
)) {
23871 UInt rE
= eregOfRexRM(pfx
,rm
);
23872 assign(arg
, getYMMReg(rE
));
23874 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
23876 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23877 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
23879 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
23881 putYMMReg( rG
, unop(op
, mkexpr(arg
)) );
23882 *uses_vvvv
= False
;
23887 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
23888 had a variant of Iop_64x4toV256 that took F64s as args instead. */
23889 static Long
dis_CVTDQ2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
23892 IRTemp addr
= IRTemp_INVALID
;
23895 UChar modrm
= getUChar(delta
);
23896 IRTemp sV
= newTemp(Ity_V128
);
23897 UInt rG
= gregOfRexRM(pfx
,modrm
);
23898 if (epartIsReg(modrm
)) {
23899 UInt rE
= eregOfRexRM(pfx
,modrm
);
23900 assign( sV
, getXMMReg(rE
) );
23902 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
23904 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23905 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
23907 DIP("vcvtdq2pd %s,%s\n", dis_buf
, nameYMMReg(rG
) );
23909 IRTemp s3
, s2
, s1
, s0
;
23910 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
23911 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
23915 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s3
))),
23916 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s2
))),
23917 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s1
))),
23918 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s0
)))
23920 putYMMReg(rG
, res
);
23925 static Long
dis_CVTPD2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
23928 IRTemp addr
= IRTemp_INVALID
;
23931 UChar modrm
= getUChar(delta
);
23932 UInt rG
= gregOfRexRM(pfx
,modrm
);
23933 IRTemp argV
= newTemp(Ity_V256
);
23934 IRTemp rmode
= newTemp(Ity_I32
);
23935 if (epartIsReg(modrm
)) {
23936 UInt rE
= eregOfRexRM(pfx
,modrm
);
23937 assign( argV
, getYMMReg(rE
) );
23939 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE
), nameXMMReg(rG
));
23941 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23942 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
23944 DIP("vcvtpd2psy %s,%s\n", dis_buf
, nameXMMReg(rG
) );
23947 assign( rmode
, get_sse_roundingmode() );
23948 IRTemp t3
, t2
, t1
, t0
;
23949 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
23950 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
23951 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
23952 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
23953 putXMMRegLane32F( rG
, 3, CVT(t3
) );
23954 putXMMRegLane32F( rG
, 2, CVT(t2
) );
23955 putXMMRegLane32F( rG
, 1, CVT(t1
) );
23956 putXMMRegLane32F( rG
, 0, CVT(t0
) );
23958 putYMMRegLane128( rG
, 1, mkV128(0) );
23963 static IRTemp
math_VPUNPCK_YMM ( IRTemp tL
, IRType tR
, IROp op
)
23965 IRTemp tLhi
, tLlo
, tRhi
, tRlo
;
23966 tLhi
= tLlo
= tRhi
= tRlo
= IRTemp_INVALID
;
23967 IRTemp res
= newTemp(Ity_V256
);
23968 breakupV256toV128s( tL
, &tLhi
, &tLlo
);
23969 breakupV256toV128s( tR
, &tRhi
, &tRlo
);
23970 assign( res
, binop( Iop_V128HLtoV256
,
23971 binop( op
, mkexpr(tRhi
), mkexpr(tLhi
) ),
23972 binop( op
, mkexpr(tRlo
), mkexpr(tLlo
) ) ) );
23977 static IRTemp
math_VPUNPCKLBW_YMM ( IRTemp tL
, IRTemp tR
)
23979 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO8x16
);
23983 static IRTemp
math_VPUNPCKLWD_YMM ( IRTemp tL
, IRTemp tR
)
23985 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO16x8
);
23989 static IRTemp
math_VPUNPCKLDQ_YMM ( IRTemp tL
, IRTemp tR
)
23991 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO32x4
);
23995 static IRTemp
math_VPUNPCKLQDQ_YMM ( IRTemp tL
, IRTemp tR
)
23997 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO64x2
);
24001 static IRTemp
math_VPUNPCKHBW_YMM ( IRTemp tL
, IRTemp tR
)
24003 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI8x16
);
24007 static IRTemp
math_VPUNPCKHWD_YMM ( IRTemp tL
, IRTemp tR
)
24009 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI16x8
);
24013 static IRTemp
math_VPUNPCKHDQ_YMM ( IRTemp tL
, IRTemp tR
)
24015 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI32x4
);
24019 static IRTemp
math_VPUNPCKHQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24021 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI64x2
);
24025 static IRTemp
math_VPACKSSWB_YMM ( IRTemp tL
, IRTemp tR
)
24027 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Sx16
);
24031 static IRTemp
math_VPACKUSWB_YMM ( IRTemp tL
, IRTemp tR
)
24033 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Ux16
);
24037 static IRTemp
math_VPACKSSDW_YMM ( IRTemp tL
, IRTemp tR
)
24039 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Sx8
);
24043 static IRTemp
math_VPACKUSDW_YMM ( IRTemp tL
, IRTemp tR
)
24045 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Ux8
);
24049 __attribute__((noinline
))
24051 Long
dis_ESC_0F__VEX (
24052 /*MB_OUT*/DisResult
* dres
,
24053 /*OUT*/ Bool
* uses_vvvv
,
24054 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
24056 void* callback_opaque
,
24057 const VexArchInfo
* archinfo
,
24058 const VexAbiInfo
* vbi
,
24059 Prefix pfx
, Int sz
, Long deltaIN
24062 IRTemp addr
= IRTemp_INVALID
;
24065 Long delta
= deltaIN
;
24066 UChar opc
= getUChar(delta
);
24068 *uses_vvvv
= False
;
24073 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24074 /* Move 64 bits from E (mem only) to G (lo half xmm).
24075 Bits 255-64 of the dest are zeroed out. */
24076 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24077 UChar modrm
= getUChar(delta
);
24078 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24079 UInt rG
= gregOfRexRM(pfx
,modrm
);
24080 IRTemp z128
= newTemp(Ity_V128
);
24081 assign(z128
, mkV128(0));
24082 putXMMReg( rG
, mkexpr(z128
) );
24083 /* FIXME: ALIGNMENT CHECK? */
24084 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
24085 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24086 DIP("vmovsd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24088 goto decode_success
;
24090 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24092 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24093 UChar modrm
= getUChar(delta
);
24094 UInt rG
= gregOfRexRM(pfx
, modrm
);
24095 UInt rE
= eregOfRexRM(pfx
, modrm
);
24096 UInt rV
= getVexNvvvv(pfx
);
24098 DIP("vmovsd %s,%s,%s\n",
24099 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24100 IRTemp res
= newTemp(Ity_V128
);
24101 assign(res
, binop(Iop_64HLtoV128
,
24102 getXMMRegLane64(rV
, 1),
24103 getXMMRegLane64(rE
, 0)));
24104 putYMMRegLoAndZU(rG
, mkexpr(res
));
24106 goto decode_success
;
24108 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24109 /* Move 32 bits from E (mem only) to G (lo half xmm).
24110 Bits 255-32 of the dest are zeroed out. */
24111 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24112 UChar modrm
= getUChar(delta
);
24113 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24114 UInt rG
= gregOfRexRM(pfx
,modrm
);
24115 IRTemp z128
= newTemp(Ity_V128
);
24116 assign(z128
, mkV128(0));
24117 putXMMReg( rG
, mkexpr(z128
) );
24118 /* FIXME: ALIGNMENT CHECK? */
24119 putXMMRegLane32( rG
, 0, loadLE(Ity_I32
, mkexpr(addr
)) );
24120 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24121 DIP("vmovss %s,%s\n", dis_buf
, nameXMMReg(rG
));
24123 goto decode_success
;
24125 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24127 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24128 UChar modrm
= getUChar(delta
);
24129 UInt rG
= gregOfRexRM(pfx
, modrm
);
24130 UInt rE
= eregOfRexRM(pfx
, modrm
);
24131 UInt rV
= getVexNvvvv(pfx
);
24133 DIP("vmovss %s,%s,%s\n",
24134 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24135 IRTemp res
= newTemp(Ity_V128
);
24136 assign( res
, binop( Iop_64HLtoV128
,
24137 getXMMRegLane64(rV
, 1),
24138 binop(Iop_32HLto64
,
24139 getXMMRegLane32(rV
, 1),
24140 getXMMRegLane32(rE
, 0)) ) );
24141 putYMMRegLoAndZU(rG
, mkexpr(res
));
24143 goto decode_success
;
24145 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24146 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24147 UChar modrm
= getUChar(delta
);
24148 UInt rG
= gregOfRexRM(pfx
, modrm
);
24149 if (epartIsReg(modrm
)) {
24150 UInt rE
= eregOfRexRM(pfx
,modrm
);
24151 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24152 DIP("vmovupd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24155 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24156 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24157 DIP("vmovupd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24160 goto decode_success
;
24162 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24163 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24164 UChar modrm
= getUChar(delta
);
24165 UInt rG
= gregOfRexRM(pfx
, modrm
);
24166 if (epartIsReg(modrm
)) {
24167 UInt rE
= eregOfRexRM(pfx
,modrm
);
24168 putYMMReg( rG
, getYMMReg( rE
));
24169 DIP("vmovupd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24172 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24173 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24174 DIP("vmovupd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24177 goto decode_success
;
24179 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24180 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24181 UChar modrm
= getUChar(delta
);
24182 UInt rG
= gregOfRexRM(pfx
, modrm
);
24183 if (epartIsReg(modrm
)) {
24184 UInt rE
= eregOfRexRM(pfx
,modrm
);
24185 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24186 DIP("vmovups %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24189 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24190 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24191 DIP("vmovups %s,%s\n", dis_buf
, nameXMMReg(rG
));
24194 goto decode_success
;
24196 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24197 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24198 UChar modrm
= getUChar(delta
);
24199 UInt rG
= gregOfRexRM(pfx
, modrm
);
24200 if (epartIsReg(modrm
)) {
24201 UInt rE
= eregOfRexRM(pfx
,modrm
);
24202 putYMMReg( rG
, getYMMReg( rE
));
24203 DIP("vmovups %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24206 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24207 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24208 DIP("vmovups %s,%s\n", dis_buf
, nameYMMReg(rG
));
24211 goto decode_success
;
24216 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24217 /* Move 64 bits from G (low half xmm) to mem only. */
24218 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24219 UChar modrm
= getUChar(delta
);
24220 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24221 UInt rG
= gregOfRexRM(pfx
,modrm
);
24222 /* FIXME: ALIGNMENT CHECK? */
24223 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0));
24224 DIP("vmovsd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24226 goto decode_success
;
24228 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24230 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24231 UChar modrm
= getUChar(delta
);
24232 UInt rG
= gregOfRexRM(pfx
, modrm
);
24233 UInt rE
= eregOfRexRM(pfx
, modrm
);
24234 UInt rV
= getVexNvvvv(pfx
);
24236 DIP("vmovsd %s,%s,%s\n",
24237 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24238 IRTemp res
= newTemp(Ity_V128
);
24239 assign(res
, binop(Iop_64HLtoV128
,
24240 getXMMRegLane64(rV
, 1),
24241 getXMMRegLane64(rG
, 0)));
24242 putYMMRegLoAndZU(rE
, mkexpr(res
));
24244 goto decode_success
;
24246 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24247 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24248 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24249 UChar modrm
= getUChar(delta
);
24250 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24251 UInt rG
= gregOfRexRM(pfx
,modrm
);
24252 /* FIXME: ALIGNMENT CHECK? */
24253 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0));
24254 DIP("vmovss %s,%s\n", nameXMMReg(rG
), dis_buf
);
24256 goto decode_success
;
24258 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24260 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24261 UChar modrm
= getUChar(delta
);
24262 UInt rG
= gregOfRexRM(pfx
, modrm
);
24263 UInt rE
= eregOfRexRM(pfx
, modrm
);
24264 UInt rV
= getVexNvvvv(pfx
);
24266 DIP("vmovss %s,%s,%s\n",
24267 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24268 IRTemp res
= newTemp(Ity_V128
);
24269 assign( res
, binop( Iop_64HLtoV128
,
24270 getXMMRegLane64(rV
, 1),
24271 binop(Iop_32HLto64
,
24272 getXMMRegLane32(rV
, 1),
24273 getXMMRegLane32(rG
, 0)) ) );
24274 putYMMRegLoAndZU(rE
, mkexpr(res
));
24276 goto decode_success
;
24278 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24279 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24280 UChar modrm
= getUChar(delta
);
24281 UInt rG
= gregOfRexRM(pfx
,modrm
);
24282 if (epartIsReg(modrm
)) {
24283 UInt rE
= eregOfRexRM(pfx
,modrm
);
24284 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24285 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24288 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24289 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24290 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24293 goto decode_success
;
24295 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24296 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24297 UChar modrm
= getUChar(delta
);
24298 UInt rG
= gregOfRexRM(pfx
,modrm
);
24299 if (epartIsReg(modrm
)) {
24300 UInt rE
= eregOfRexRM(pfx
,modrm
);
24301 putYMMReg( rE
, getYMMReg(rG
) );
24302 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24305 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24306 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24307 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24310 goto decode_success
;
24312 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24313 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24314 UChar modrm
= getUChar(delta
);
24315 UInt rG
= gregOfRexRM(pfx
,modrm
);
24316 if (epartIsReg(modrm
)) {
24317 UInt rE
= eregOfRexRM(pfx
,modrm
);
24318 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24319 DIP("vmovups %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24322 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24323 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24324 DIP("vmovups %s,%s\n", nameXMMReg(rG
), dis_buf
);
24327 goto decode_success
;
24329 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24330 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24331 UChar modrm
= getUChar(delta
);
24332 UInt rG
= gregOfRexRM(pfx
,modrm
);
24333 if (epartIsReg(modrm
)) {
24334 UInt rE
= eregOfRexRM(pfx
,modrm
);
24335 putYMMReg( rE
, getYMMReg(rG
) );
24336 DIP("vmovups %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24339 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24340 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24341 DIP("vmovups %s,%s\n", nameYMMReg(rG
), dis_buf
);
24344 goto decode_success
;
24349 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24350 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24351 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
24352 goto decode_success
;
24354 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24355 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24356 delta
= dis_MOVDDUP_256( vbi
, pfx
, delta
);
24357 goto decode_success
;
24359 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24360 /* Insn only exists in reg form */
24361 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24362 && epartIsReg(getUChar(delta
))) {
24363 UChar modrm
= getUChar(delta
);
24364 UInt rG
= gregOfRexRM(pfx
, modrm
);
24365 UInt rE
= eregOfRexRM(pfx
, modrm
);
24366 UInt rV
= getVexNvvvv(pfx
);
24368 DIP("vmovhlps %s,%s,%s\n",
24369 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24370 IRTemp res
= newTemp(Ity_V128
);
24371 assign(res
, binop(Iop_64HLtoV128
,
24372 getXMMRegLane64(rV
, 1),
24373 getXMMRegLane64(rE
, 1)));
24374 putYMMRegLoAndZU(rG
, mkexpr(res
));
24376 goto decode_success
;
24378 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24379 /* Insn exists only in mem form, it appears. */
24380 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24381 /* Insn exists only in mem form, it appears. */
24382 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24383 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24384 UChar modrm
= getUChar(delta
);
24385 UInt rG
= gregOfRexRM(pfx
, modrm
);
24386 UInt rV
= getVexNvvvv(pfx
);
24387 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24389 DIP("vmovlpd %s,%s,%s\n",
24390 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24391 IRTemp res
= newTemp(Ity_V128
);
24392 assign(res
, binop(Iop_64HLtoV128
,
24393 getXMMRegLane64(rV
, 1),
24394 loadLE(Ity_I64
, mkexpr(addr
))));
24395 putYMMRegLoAndZU(rG
, mkexpr(res
));
24397 goto decode_success
;
24399 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24400 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24401 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24403 goto decode_success
;
24405 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24406 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24407 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, True
/*isL*/ );
24408 goto decode_success
;
24413 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24414 /* Insn exists only in mem form, it appears. */
24415 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24416 /* Insn exists only in mem form, it appears. */
24417 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24418 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24419 UChar modrm
= getUChar(delta
);
24420 UInt rG
= gregOfRexRM(pfx
, modrm
);
24421 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24423 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0));
24424 DIP("vmovlpd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24425 goto decode_success
;
24431 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24432 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24433 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24434 Bool hi
= opc
== 0x15;
24435 UChar modrm
= getUChar(delta
);
24436 UInt rG
= gregOfRexRM(pfx
,modrm
);
24437 UInt rV
= getVexNvvvv(pfx
);
24438 IRTemp eV
= newTemp(Ity_V128
);
24439 IRTemp vV
= newTemp(Ity_V128
);
24440 assign( vV
, getXMMReg(rV
) );
24441 if (epartIsReg(modrm
)) {
24442 UInt rE
= eregOfRexRM(pfx
,modrm
);
24443 assign( eV
, getXMMReg(rE
) );
24445 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24446 nameXMMReg(rE
), nameXMMReg(rG
));
24448 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24449 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24451 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24452 dis_buf
, nameXMMReg(rG
));
24454 IRTemp res
= math_UNPCKxPS_128( eV
, vV
, hi
);
24455 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24457 goto decode_success
;
24459 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24460 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24461 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24462 Bool hi
= opc
== 0x15;
24463 UChar modrm
= getUChar(delta
);
24464 UInt rG
= gregOfRexRM(pfx
,modrm
);
24465 UInt rV
= getVexNvvvv(pfx
);
24466 IRTemp eV
= newTemp(Ity_V256
);
24467 IRTemp vV
= newTemp(Ity_V256
);
24468 assign( vV
, getYMMReg(rV
) );
24469 if (epartIsReg(modrm
)) {
24470 UInt rE
= eregOfRexRM(pfx
,modrm
);
24471 assign( eV
, getYMMReg(rE
) );
24473 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24474 nameYMMReg(rE
), nameYMMReg(rG
));
24476 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24477 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24479 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24480 dis_buf
, nameYMMReg(rG
));
24482 IRTemp res
= math_UNPCKxPS_256( eV
, vV
, hi
);
24483 putYMMReg( rG
, mkexpr(res
) );
24485 goto decode_success
;
24487 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24488 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24489 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24490 Bool hi
= opc
== 0x15;
24491 UChar modrm
= getUChar(delta
);
24492 UInt rG
= gregOfRexRM(pfx
,modrm
);
24493 UInt rV
= getVexNvvvv(pfx
);
24494 IRTemp eV
= newTemp(Ity_V128
);
24495 IRTemp vV
= newTemp(Ity_V128
);
24496 assign( vV
, getXMMReg(rV
) );
24497 if (epartIsReg(modrm
)) {
24498 UInt rE
= eregOfRexRM(pfx
,modrm
);
24499 assign( eV
, getXMMReg(rE
) );
24501 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24502 nameXMMReg(rE
), nameXMMReg(rG
));
24504 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24505 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24507 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24508 dis_buf
, nameXMMReg(rG
));
24510 IRTemp res
= math_UNPCKxPD_128( eV
, vV
, hi
);
24511 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24513 goto decode_success
;
24515 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24516 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24517 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24518 Bool hi
= opc
== 0x15;
24519 UChar modrm
= getUChar(delta
);
24520 UInt rG
= gregOfRexRM(pfx
,modrm
);
24521 UInt rV
= getVexNvvvv(pfx
);
24522 IRTemp eV
= newTemp(Ity_V256
);
24523 IRTemp vV
= newTemp(Ity_V256
);
24524 assign( vV
, getYMMReg(rV
) );
24525 if (epartIsReg(modrm
)) {
24526 UInt rE
= eregOfRexRM(pfx
,modrm
);
24527 assign( eV
, getYMMReg(rE
) );
24529 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24530 nameYMMReg(rE
), nameYMMReg(rG
));
24532 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24533 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24535 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24536 dis_buf
, nameYMMReg(rG
));
24538 IRTemp res
= math_UNPCKxPD_256( eV
, vV
, hi
);
24539 putYMMReg( rG
, mkexpr(res
) );
24541 goto decode_success
;
24546 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24547 /* Insn only exists in reg form */
24548 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24549 && epartIsReg(getUChar(delta
))) {
24550 UChar modrm
= getUChar(delta
);
24551 UInt rG
= gregOfRexRM(pfx
, modrm
);
24552 UInt rE
= eregOfRexRM(pfx
, modrm
);
24553 UInt rV
= getVexNvvvv(pfx
);
24555 DIP("vmovlhps %s,%s,%s\n",
24556 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24557 IRTemp res
= newTemp(Ity_V128
);
24558 assign(res
, binop(Iop_64HLtoV128
,
24559 getXMMRegLane64(rE
, 0),
24560 getXMMRegLane64(rV
, 0)));
24561 putYMMRegLoAndZU(rG
, mkexpr(res
));
24563 goto decode_success
;
24565 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24566 /* Insn exists only in mem form, it appears. */
24567 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24568 /* Insn exists only in mem form, it appears. */
24569 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24570 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24571 UChar modrm
= getUChar(delta
);
24572 UInt rG
= gregOfRexRM(pfx
, modrm
);
24573 UInt rV
= getVexNvvvv(pfx
);
24574 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24576 DIP("vmovhp%c %s,%s,%s\n", have66(pfx
) ? 'd' : 's',
24577 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24578 IRTemp res
= newTemp(Ity_V128
);
24579 assign(res
, binop(Iop_64HLtoV128
,
24580 loadLE(Ity_I64
, mkexpr(addr
)),
24581 getXMMRegLane64(rV
, 0)));
24582 putYMMRegLoAndZU(rG
, mkexpr(res
));
24584 goto decode_success
;
24586 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24587 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24588 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24590 goto decode_success
;
24592 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24593 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24594 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, False
/*!isL*/ );
24595 goto decode_success
;
24600 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24601 /* Insn exists only in mem form, it appears. */
24602 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24603 /* Insn exists only in mem form, it appears. */
24604 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24605 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24606 UChar modrm
= getUChar(delta
);
24607 UInt rG
= gregOfRexRM(pfx
, modrm
);
24608 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24610 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 1));
24611 DIP("vmovhp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24612 nameXMMReg(rG
), dis_buf
);
24613 goto decode_success
;
24618 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24619 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24620 UChar modrm
= getUChar(delta
);
24621 UInt rG
= gregOfRexRM(pfx
, modrm
);
24622 if (epartIsReg(modrm
)) {
24623 UInt rE
= eregOfRexRM(pfx
,modrm
);
24624 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24625 DIP("vmovapd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24628 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24629 gen_SEGV_if_not_16_aligned( addr
);
24630 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24631 DIP("vmovapd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24634 goto decode_success
;
24636 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24637 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24638 UChar modrm
= getUChar(delta
);
24639 UInt rG
= gregOfRexRM(pfx
, modrm
);
24640 if (epartIsReg(modrm
)) {
24641 UInt rE
= eregOfRexRM(pfx
,modrm
);
24642 putYMMReg( rG
, getYMMReg( rE
));
24643 DIP("vmovapd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24646 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24647 gen_SEGV_if_not_32_aligned( addr
);
24648 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24649 DIP("vmovapd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24652 goto decode_success
;
24654 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24655 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24656 UChar modrm
= getUChar(delta
);
24657 UInt rG
= gregOfRexRM(pfx
, modrm
);
24658 if (epartIsReg(modrm
)) {
24659 UInt rE
= eregOfRexRM(pfx
,modrm
);
24660 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24661 DIP("vmovaps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24664 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24665 gen_SEGV_if_not_16_aligned( addr
);
24666 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24667 DIP("vmovaps %s,%s\n", dis_buf
, nameXMMReg(rG
));
24670 goto decode_success
;
24672 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24673 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24674 UChar modrm
= getUChar(delta
);
24675 UInt rG
= gregOfRexRM(pfx
, modrm
);
24676 if (epartIsReg(modrm
)) {
24677 UInt rE
= eregOfRexRM(pfx
,modrm
);
24678 putYMMReg( rG
, getYMMReg( rE
));
24679 DIP("vmovaps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24682 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24683 gen_SEGV_if_not_32_aligned( addr
);
24684 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24685 DIP("vmovaps %s,%s\n", dis_buf
, nameYMMReg(rG
));
24688 goto decode_success
;
24693 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24694 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24695 UChar modrm
= getUChar(delta
);
24696 UInt rG
= gregOfRexRM(pfx
,modrm
);
24697 if (epartIsReg(modrm
)) {
24698 UInt rE
= eregOfRexRM(pfx
,modrm
);
24699 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24700 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24703 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24704 gen_SEGV_if_not_16_aligned( addr
);
24705 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24706 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24709 goto decode_success
;
24711 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24712 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24713 UChar modrm
= getUChar(delta
);
24714 UInt rG
= gregOfRexRM(pfx
,modrm
);
24715 if (epartIsReg(modrm
)) {
24716 UInt rE
= eregOfRexRM(pfx
,modrm
);
24717 putYMMReg( rE
, getYMMReg(rG
) );
24718 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24721 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24722 gen_SEGV_if_not_32_aligned( addr
);
24723 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24724 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24727 goto decode_success
;
24729 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24730 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24731 UChar modrm
= getUChar(delta
);
24732 UInt rG
= gregOfRexRM(pfx
,modrm
);
24733 if (epartIsReg(modrm
)) {
24734 UInt rE
= eregOfRexRM(pfx
,modrm
);
24735 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24736 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24738 goto decode_success
;
24740 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24741 gen_SEGV_if_not_16_aligned( addr
);
24742 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24743 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), dis_buf
);
24745 goto decode_success
;
24748 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24749 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24750 UChar modrm
= getUChar(delta
);
24751 UInt rG
= gregOfRexRM(pfx
,modrm
);
24752 if (epartIsReg(modrm
)) {
24753 UInt rE
= eregOfRexRM(pfx
,modrm
);
24754 putYMMReg( rE
, getYMMReg(rG
) );
24755 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24757 goto decode_success
;
24759 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24760 gen_SEGV_if_not_32_aligned( addr
);
24761 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24762 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), dis_buf
);
24764 goto decode_success
;
24770 IRTemp rmode
= newTemp(Ity_I32
);
24771 assign( rmode
, get_sse_roundingmode() );
24772 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24773 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24774 UChar modrm
= getUChar(delta
);
24775 UInt rV
= getVexNvvvv(pfx
);
24776 UInt rD
= gregOfRexRM(pfx
, modrm
);
24777 IRTemp arg32
= newTemp(Ity_I32
);
24778 if (epartIsReg(modrm
)) {
24779 UInt rS
= eregOfRexRM(pfx
,modrm
);
24780 assign( arg32
, getIReg32(rS
) );
24782 DIP("vcvtsi2sdl %s,%s,%s\n",
24783 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24785 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24786 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
24788 DIP("vcvtsi2sdl %s,%s,%s\n",
24789 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24791 putXMMRegLane64F( rD
, 0,
24792 unop(Iop_I32StoF64
, mkexpr(arg32
)));
24793 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24794 putYMMRegLane128( rD
, 1, mkV128(0) );
24796 goto decode_success
;
24798 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24799 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24800 UChar modrm
= getUChar(delta
);
24801 UInt rV
= getVexNvvvv(pfx
);
24802 UInt rD
= gregOfRexRM(pfx
, modrm
);
24803 IRTemp arg64
= newTemp(Ity_I64
);
24804 if (epartIsReg(modrm
)) {
24805 UInt rS
= eregOfRexRM(pfx
,modrm
);
24806 assign( arg64
, getIReg64(rS
) );
24808 DIP("vcvtsi2sdq %s,%s,%s\n",
24809 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24811 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24812 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24814 DIP("vcvtsi2sdq %s,%s,%s\n",
24815 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24817 putXMMRegLane64F( rD
, 0,
24818 binop( Iop_I64StoF64
,
24819 get_sse_roundingmode(),
24821 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24822 putYMMRegLane128( rD
, 1, mkV128(0) );
24824 goto decode_success
;
24826 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24827 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24828 UChar modrm
= getUChar(delta
);
24829 UInt rV
= getVexNvvvv(pfx
);
24830 UInt rD
= gregOfRexRM(pfx
, modrm
);
24831 IRTemp arg64
= newTemp(Ity_I64
);
24832 if (epartIsReg(modrm
)) {
24833 UInt rS
= eregOfRexRM(pfx
,modrm
);
24834 assign( arg64
, getIReg64(rS
) );
24836 DIP("vcvtsi2ssq %s,%s,%s\n",
24837 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24839 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24840 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24842 DIP("vcvtsi2ssq %s,%s,%s\n",
24843 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24845 putXMMRegLane32F( rD
, 0,
24846 binop(Iop_F64toF32
,
24848 binop(Iop_I64StoF64
, mkexpr(rmode
),
24849 mkexpr(arg64
)) ) );
24850 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
24851 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24852 putYMMRegLane128( rD
, 1, mkV128(0) );
24854 goto decode_success
;
24856 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24857 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24858 UChar modrm
= getUChar(delta
);
24859 UInt rV
= getVexNvvvv(pfx
);
24860 UInt rD
= gregOfRexRM(pfx
, modrm
);
24861 IRTemp arg32
= newTemp(Ity_I32
);
24862 if (epartIsReg(modrm
)) {
24863 UInt rS
= eregOfRexRM(pfx
,modrm
);
24864 assign( arg32
, getIReg32(rS
) );
24866 DIP("vcvtsi2ssl %s,%s,%s\n",
24867 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24869 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24870 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
24872 DIP("vcvtsi2ssl %s,%s,%s\n",
24873 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24875 putXMMRegLane32F( rD
, 0,
24876 binop(Iop_F64toF32
,
24878 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
24879 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
24880 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24881 putYMMRegLane128( rD
, 1, mkV128(0) );
24883 goto decode_success
;
24889 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
24890 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
24891 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24892 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24893 UChar modrm
= getUChar(delta
);
24894 UInt rS
= gregOfRexRM(pfx
, modrm
);
24895 IRTemp tS
= newTemp(Ity_V128
);
24896 assign(tS
, getXMMReg(rS
));
24897 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24899 gen_SEGV_if_not_16_aligned(addr
);
24900 storeLE(mkexpr(addr
), mkexpr(tS
));
24901 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24902 nameXMMReg(rS
), dis_buf
);
24903 goto decode_success
;
24905 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
24906 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
24907 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24908 && 1==getVexL(pfx
)/*256*/ && !epartIsReg(getUChar(delta
))) {
24909 UChar modrm
= getUChar(delta
);
24910 UInt rS
= gregOfRexRM(pfx
, modrm
);
24911 IRTemp tS
= newTemp(Ity_V256
);
24912 assign(tS
, getYMMReg(rS
));
24913 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24915 gen_SEGV_if_not_32_aligned(addr
);
24916 storeLE(mkexpr(addr
), mkexpr(tS
));
24917 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24918 nameYMMReg(rS
), dis_buf
);
24919 goto decode_success
;
24924 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
24925 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24926 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24927 goto decode_success
;
24929 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
24930 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24931 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24932 goto decode_success
;
24934 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
24935 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24936 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24937 goto decode_success
;
24939 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
24940 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24941 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24942 goto decode_success
;
24947 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
24948 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24949 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24950 goto decode_success
;
24952 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
24953 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24954 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24955 goto decode_success
;
24957 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
24958 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24959 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
24960 goto decode_success
;
24962 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
24963 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24964 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
24965 goto decode_success
;
24971 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
24972 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
24973 if (have66noF2noF3(pfx
)) {
24974 delta
= dis_COMISD( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
24975 goto decode_success
;
24977 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
24978 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
24979 if (haveNo66noF2noF3(pfx
)) {
24980 delta
= dis_COMISS( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
24981 goto decode_success
;
24986 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
24987 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24988 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
24989 goto decode_success
;
24991 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
24992 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24993 delta
= dis_MOVMSKPD_256( vbi
, pfx
, delta
);
24994 goto decode_success
;
24996 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
24997 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24998 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
24999 goto decode_success
;
25001 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25002 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25003 delta
= dis_MOVMSKPS_256( vbi
, pfx
, delta
);
25004 goto decode_success
;
25009 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25010 if (haveF3no66noF2(pfx
)) {
25011 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25012 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtss", Iop_Sqrt32F0x4
);
25013 goto decode_success
;
25015 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25016 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25017 delta
= dis_AVX128_E_to_G_unary_all(
25018 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx4
);
25019 goto decode_success
;
25021 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25022 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25023 delta
= dis_AVX256_E_to_G_unary_all(
25024 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx8
);
25025 goto decode_success
;
25027 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25028 if (haveF2no66noF3(pfx
)) {
25029 delta
= dis_AVX128_E_V_to_G_lo64_unary(
25030 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtsd", Iop_Sqrt64F0x2
);
25031 goto decode_success
;
25033 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25034 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25035 delta
= dis_AVX128_E_to_G_unary_all(
25036 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx2
);
25037 goto decode_success
;
25039 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25040 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25041 delta
= dis_AVX256_E_to_G_unary_all(
25042 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx4
);
25043 goto decode_success
;
25048 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25049 if (haveF3no66noF2(pfx
)) {
25050 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25051 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtss",
25052 Iop_RSqrtEst32F0x4
);
25053 goto decode_success
;
25055 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25056 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25057 delta
= dis_AVX128_E_to_G_unary_all(
25058 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx4
);
25059 goto decode_success
;
25061 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25062 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25063 delta
= dis_AVX256_E_to_G_unary_all(
25064 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx8
);
25065 goto decode_success
;
25070 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25071 if (haveF3no66noF2(pfx
)) {
25072 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25073 uses_vvvv
, vbi
, pfx
, delta
, "vrcpss", Iop_RecipEst32F0x4
);
25074 goto decode_success
;
25076 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25077 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25078 delta
= dis_AVX128_E_to_G_unary_all(
25079 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx4
);
25080 goto decode_success
;
25082 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25083 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25084 delta
= dis_AVX256_E_to_G_unary_all(
25085 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx8
);
25086 goto decode_success
;
25091 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25092 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25093 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25094 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25095 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
);
25096 goto decode_success
;
25098 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25099 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25100 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25101 delta
= dis_AVX256_E_V_to_G(
25102 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
);
25103 goto decode_success
;
25105 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25106 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25107 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25108 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
);
25109 goto decode_success
;
25111 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25112 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25113 delta
= dis_AVX256_E_V_to_G(
25114 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
);
25115 goto decode_success
;
25120 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25121 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25122 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25123 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25124 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
,
25125 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25126 goto decode_success
;
25128 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25129 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25130 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25131 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
,
25132 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25133 goto decode_success
;
25135 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25136 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25137 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25138 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
,
25139 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25140 goto decode_success
;
25142 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25143 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25144 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25145 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
,
25146 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25147 goto decode_success
;
25152 /* VORPD r/m, rV, r ::: r = rV | r/m */
25153 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25154 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25155 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25156 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV128
);
25157 goto decode_success
;
25159 /* VORPD r/m, rV, r ::: r = rV | r/m */
25160 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25161 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25162 delta
= dis_AVX256_E_V_to_G(
25163 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV256
);
25164 goto decode_success
;
25166 /* VORPS r/m, rV, r ::: r = rV | r/m */
25167 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25168 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25169 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25170 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV128
);
25171 goto decode_success
;
25173 /* VORPS r/m, rV, r ::: r = rV | r/m */
25174 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25175 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25176 delta
= dis_AVX256_E_V_to_G(
25177 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV256
);
25178 goto decode_success
;
25183 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25184 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25185 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25186 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25187 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV128
);
25188 goto decode_success
;
25190 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25191 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25192 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25193 delta
= dis_AVX256_E_V_to_G(
25194 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV256
);
25195 goto decode_success
;
25197 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25198 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25199 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25200 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25201 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV128
);
25202 goto decode_success
;
25204 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25205 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25206 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25207 delta
= dis_AVX256_E_V_to_G(
25208 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV256
);
25209 goto decode_success
;
25214 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25215 if (haveF2no66noF3(pfx
)) {
25216 delta
= dis_AVX128_E_V_to_G_lo64(
25217 uses_vvvv
, vbi
, pfx
, delta
, "vaddsd", Iop_Add64F0x2
);
25218 goto decode_success
;
25220 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25221 if (haveF3no66noF2(pfx
)) {
25222 delta
= dis_AVX128_E_V_to_G_lo32(
25223 uses_vvvv
, vbi
, pfx
, delta
, "vaddss", Iop_Add32F0x4
);
25224 goto decode_success
;
25226 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25227 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25228 delta
= dis_AVX128_E_V_to_G(
25229 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx4
);
25230 goto decode_success
;
25232 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25233 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25234 delta
= dis_AVX256_E_V_to_G(
25235 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx8
);
25236 goto decode_success
;
25238 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25239 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25240 delta
= dis_AVX128_E_V_to_G(
25241 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx2
);
25242 goto decode_success
;
25244 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25245 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25246 delta
= dis_AVX256_E_V_to_G(
25247 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx4
);
25248 goto decode_success
;
25253 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25254 if (haveF2no66noF3(pfx
)) {
25255 delta
= dis_AVX128_E_V_to_G_lo64(
25256 uses_vvvv
, vbi
, pfx
, delta
, "vmulsd", Iop_Mul64F0x2
);
25257 goto decode_success
;
25259 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25260 if (haveF3no66noF2(pfx
)) {
25261 delta
= dis_AVX128_E_V_to_G_lo32(
25262 uses_vvvv
, vbi
, pfx
, delta
, "vmulss", Iop_Mul32F0x4
);
25263 goto decode_success
;
25265 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25266 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25267 delta
= dis_AVX128_E_V_to_G(
25268 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx4
);
25269 goto decode_success
;
25271 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25272 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25273 delta
= dis_AVX256_E_V_to_G(
25274 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx8
);
25275 goto decode_success
;
25277 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25278 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25279 delta
= dis_AVX128_E_V_to_G(
25280 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx2
);
25281 goto decode_success
;
25283 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25284 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25285 delta
= dis_AVX256_E_V_to_G(
25286 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx4
);
25287 goto decode_success
;
25292 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25293 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25294 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25295 goto decode_success
;
25297 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25298 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25299 delta
= dis_CVTPS2PD_256( vbi
, pfx
, delta
);
25300 goto decode_success
;
25302 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25303 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25304 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25305 goto decode_success
;
25307 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25308 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25309 delta
= dis_CVTPD2PS_256( vbi
, pfx
, delta
);
25310 goto decode_success
;
25312 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25313 if (haveF2no66noF3(pfx
)) {
25314 UChar modrm
= getUChar(delta
);
25315 UInt rV
= getVexNvvvv(pfx
);
25316 UInt rD
= gregOfRexRM(pfx
, modrm
);
25317 IRTemp f64lo
= newTemp(Ity_F64
);
25318 IRTemp rmode
= newTemp(Ity_I32
);
25319 assign( rmode
, get_sse_roundingmode() );
25320 if (epartIsReg(modrm
)) {
25321 UInt rS
= eregOfRexRM(pfx
,modrm
);
25322 assign(f64lo
, getXMMRegLane64F(rS
, 0));
25324 DIP("vcvtsd2ss %s,%s,%s\n",
25325 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25327 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25328 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)) );
25330 DIP("vcvtsd2ss %s,%s,%s\n",
25331 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25333 putXMMRegLane32F( rD
, 0,
25334 binop( Iop_F64toF32
, mkexpr(rmode
),
25336 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
25337 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25338 putYMMRegLane128( rD
, 1, mkV128(0) );
25340 goto decode_success
;
25342 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25343 if (haveF3no66noF2(pfx
)) {
25344 UChar modrm
= getUChar(delta
);
25345 UInt rV
= getVexNvvvv(pfx
);
25346 UInt rD
= gregOfRexRM(pfx
, modrm
);
25347 IRTemp f32lo
= newTemp(Ity_F32
);
25348 if (epartIsReg(modrm
)) {
25349 UInt rS
= eregOfRexRM(pfx
,modrm
);
25350 assign(f32lo
, getXMMRegLane32F(rS
, 0));
25352 DIP("vcvtss2sd %s,%s,%s\n",
25353 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25355 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25356 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
25358 DIP("vcvtss2sd %s,%s,%s\n",
25359 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25361 putXMMRegLane64F( rD
, 0,
25362 unop( Iop_F32toF64
, mkexpr(f32lo
)) );
25363 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25364 putYMMRegLane128( rD
, 1, mkV128(0) );
25366 goto decode_success
;
25371 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25372 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25373 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25374 True
/*isAvx*/, False
/*!r2zero*/ );
25375 goto decode_success
;
25377 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25378 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25379 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25380 False
/*!r2zero*/ );
25381 goto decode_success
;
25383 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25384 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
25385 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25386 True
/*isAvx*/, True
/*r2zero*/ );
25387 goto decode_success
;
25389 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25390 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
25391 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25393 goto decode_success
;
25395 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25396 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25397 delta
= dis_CVTDQ2PS_128 ( vbi
, pfx
, delta
, True
/*isAvx*/ );
25398 goto decode_success
;
25400 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25401 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25402 delta
= dis_CVTDQ2PS_256 ( vbi
, pfx
, delta
);
25403 goto decode_success
;
25408 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25409 if (haveF2no66noF3(pfx
)) {
25410 delta
= dis_AVX128_E_V_to_G_lo64(
25411 uses_vvvv
, vbi
, pfx
, delta
, "vsubsd", Iop_Sub64F0x2
);
25412 goto decode_success
;
25414 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25415 if (haveF3no66noF2(pfx
)) {
25416 delta
= dis_AVX128_E_V_to_G_lo32(
25417 uses_vvvv
, vbi
, pfx
, delta
, "vsubss", Iop_Sub32F0x4
);
25418 goto decode_success
;
25420 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25421 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25422 delta
= dis_AVX128_E_V_to_G(
25423 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx4
);
25424 goto decode_success
;
25426 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25427 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25428 delta
= dis_AVX256_E_V_to_G(
25429 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx8
);
25430 goto decode_success
;
25432 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25433 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25434 delta
= dis_AVX128_E_V_to_G(
25435 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx2
);
25436 goto decode_success
;
25438 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25439 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25440 delta
= dis_AVX256_E_V_to_G(
25441 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx4
);
25442 goto decode_success
;
25447 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25448 if (haveF2no66noF3(pfx
)) {
25449 delta
= dis_AVX128_E_V_to_G_lo64(
25450 uses_vvvv
, vbi
, pfx
, delta
, "vminsd", Iop_Min64F0x2
);
25451 goto decode_success
;
25453 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25454 if (haveF3no66noF2(pfx
)) {
25455 delta
= dis_AVX128_E_V_to_G_lo32(
25456 uses_vvvv
, vbi
, pfx
, delta
, "vminss", Iop_Min32F0x4
);
25457 goto decode_success
;
25459 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25460 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25461 delta
= dis_AVX128_E_V_to_G(
25462 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx4
);
25463 goto decode_success
;
25465 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25466 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25467 delta
= dis_AVX256_E_V_to_G(
25468 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx8
);
25469 goto decode_success
;
25471 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25472 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25473 delta
= dis_AVX128_E_V_to_G(
25474 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx2
);
25475 goto decode_success
;
25477 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25478 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25479 delta
= dis_AVX256_E_V_to_G(
25480 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx4
);
25481 goto decode_success
;
25486 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25487 if (haveF2no66noF3(pfx
)) {
25488 delta
= dis_AVX128_E_V_to_G_lo64(
25489 uses_vvvv
, vbi
, pfx
, delta
, "vdivsd", Iop_Div64F0x2
);
25490 goto decode_success
;
25492 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25493 if (haveF3no66noF2(pfx
)) {
25494 delta
= dis_AVX128_E_V_to_G_lo32(
25495 uses_vvvv
, vbi
, pfx
, delta
, "vdivss", Iop_Div32F0x4
);
25496 goto decode_success
;
25498 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25499 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25500 delta
= dis_AVX128_E_V_to_G(
25501 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx4
);
25502 goto decode_success
;
25504 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25505 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25506 delta
= dis_AVX256_E_V_to_G(
25507 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx8
);
25508 goto decode_success
;
25510 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25511 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25512 delta
= dis_AVX128_E_V_to_G(
25513 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx2
);
25514 goto decode_success
;
25516 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25517 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25518 delta
= dis_AVX256_E_V_to_G(
25519 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx4
);
25520 goto decode_success
;
25525 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25526 if (haveF2no66noF3(pfx
)) {
25527 delta
= dis_AVX128_E_V_to_G_lo64(
25528 uses_vvvv
, vbi
, pfx
, delta
, "vmaxsd", Iop_Max64F0x2
);
25529 goto decode_success
;
25531 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25532 if (haveF3no66noF2(pfx
)) {
25533 delta
= dis_AVX128_E_V_to_G_lo32(
25534 uses_vvvv
, vbi
, pfx
, delta
, "vmaxss", Iop_Max32F0x4
);
25535 goto decode_success
;
25537 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25538 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25539 delta
= dis_AVX128_E_V_to_G(
25540 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx4
);
25541 goto decode_success
;
25543 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25544 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25545 delta
= dis_AVX256_E_V_to_G(
25546 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx8
);
25547 goto decode_success
;
25549 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25550 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25551 delta
= dis_AVX128_E_V_to_G(
25552 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx2
);
25553 goto decode_success
;
25555 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25556 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25557 delta
= dis_AVX256_E_V_to_G(
25558 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx4
);
25559 goto decode_success
;
25564 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25565 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25566 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25567 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25568 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25569 Iop_InterleaveLO8x16
, NULL
,
25570 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25571 goto decode_success
;
25573 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25574 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25575 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25576 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25577 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25578 math_VPUNPCKLBW_YMM
);
25579 goto decode_success
;
25584 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25585 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25586 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25587 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25588 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25589 Iop_InterleaveLO16x8
, NULL
,
25590 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25591 goto decode_success
;
25593 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25594 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25595 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25596 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25597 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25598 math_VPUNPCKLWD_YMM
);
25599 goto decode_success
;
25604 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25605 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25606 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25607 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25608 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25609 Iop_InterleaveLO32x4
, NULL
,
25610 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25611 goto decode_success
;
25613 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25614 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25615 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25616 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25617 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25618 math_VPUNPCKLDQ_YMM
);
25619 goto decode_success
;
25624 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25625 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25626 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25627 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25628 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25629 Iop_QNarrowBin16Sto8Sx16
, NULL
,
25630 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25631 goto decode_success
;
25633 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25634 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25635 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25636 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25637 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25638 math_VPACKSSWB_YMM
);
25639 goto decode_success
;
25644 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25645 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25646 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25647 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25648 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx16
);
25649 goto decode_success
;
25651 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25652 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25653 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25654 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25655 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx32
);
25656 goto decode_success
;
25661 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25662 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25663 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25664 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25665 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx8
);
25666 goto decode_success
;
25668 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25669 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25670 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25671 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25672 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx16
);
25673 goto decode_success
;
25678 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25679 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25680 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25681 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25682 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx4
);
25683 goto decode_success
;
25685 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25686 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25687 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25688 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25689 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx8
);
25690 goto decode_success
;
25695 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25696 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25697 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25698 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25699 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25700 Iop_QNarrowBin16Sto8Ux16
, NULL
,
25701 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25702 goto decode_success
;
25704 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25705 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25706 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25707 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25708 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25709 math_VPACKUSWB_YMM
);
25710 goto decode_success
;
25715 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25716 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25717 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25718 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25719 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25720 Iop_InterleaveHI8x16
, NULL
,
25721 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25722 goto decode_success
;
25724 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25725 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25726 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25727 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25728 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25729 math_VPUNPCKHBW_YMM
);
25730 goto decode_success
;
25735 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25736 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25737 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25738 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25739 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25740 Iop_InterleaveHI16x8
, NULL
,
25741 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25742 goto decode_success
;
25744 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25745 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25746 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25747 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25748 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25749 math_VPUNPCKHWD_YMM
);
25750 goto decode_success
;
25755 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25756 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25757 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25758 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25759 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25760 Iop_InterleaveHI32x4
, NULL
,
25761 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25762 goto decode_success
;
25764 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25765 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25766 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25767 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25768 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25769 math_VPUNPCKHDQ_YMM
);
25770 goto decode_success
;
25775 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25776 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25777 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25778 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25779 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25780 Iop_QNarrowBin32Sto16Sx8
, NULL
,
25781 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25782 goto decode_success
;
25784 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25785 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25786 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25787 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25788 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25789 math_VPACKSSDW_YMM
);
25790 goto decode_success
;
25795 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25796 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25797 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25798 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25799 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25800 Iop_InterleaveLO64x2
, NULL
,
25801 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25802 goto decode_success
;
25804 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25805 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25806 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25807 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25808 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25809 math_VPUNPCKLQDQ_YMM
);
25810 goto decode_success
;
25815 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25816 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25817 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25818 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25819 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25820 Iop_InterleaveHI64x2
, NULL
,
25821 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25822 goto decode_success
;
25824 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25825 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25826 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25827 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25828 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25829 math_VPUNPCKHQDQ_YMM
);
25830 goto decode_success
;
25835 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25836 if (have66noF2noF3(pfx
)
25837 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
25838 vassert(sz
== 2); /* even tho we are transferring 4, not 2. */
25839 UChar modrm
= getUChar(delta
);
25840 if (epartIsReg(modrm
)) {
25843 gregOfRexRM(pfx
,modrm
),
25844 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
25846 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
25847 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25849 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25852 gregOfRexRM(pfx
,modrm
),
25853 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)))
25855 DIP("vmovd %s, %s\n", dis_buf
,
25856 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25858 goto decode_success
;
25860 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25861 if (have66noF2noF3(pfx
)
25862 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
25863 vassert(sz
== 2); /* even tho we are transferring 8, not 2. */
25864 UChar modrm
= getUChar(delta
);
25865 if (epartIsReg(modrm
)) {
25868 gregOfRexRM(pfx
,modrm
),
25869 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
25871 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
25872 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25874 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25877 gregOfRexRM(pfx
,modrm
),
25878 unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)))
25880 DIP("vmovq %s, %s\n", dis_buf
,
25881 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25883 goto decode_success
;
25888 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
25889 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
25890 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
25891 && 1==getVexL(pfx
)/*256*/) {
25892 UChar modrm
= getUChar(delta
);
25893 UInt rD
= gregOfRexRM(pfx
, modrm
);
25894 IRTemp tD
= newTemp(Ity_V256
);
25895 Bool isA
= have66noF2noF3(pfx
);
25896 HChar ch
= isA
? 'a' : 'u';
25897 if (epartIsReg(modrm
)) {
25898 UInt rS
= eregOfRexRM(pfx
, modrm
);
25900 assign(tD
, getYMMReg(rS
));
25901 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
25903 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25906 gen_SEGV_if_not_32_aligned(addr
);
25907 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
25908 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameYMMReg(rD
));
25910 putYMMReg(rD
, mkexpr(tD
));
25911 goto decode_success
;
25913 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
25914 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
25915 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
25916 && 0==getVexL(pfx
)/*128*/) {
25917 UChar modrm
= getUChar(delta
);
25918 UInt rD
= gregOfRexRM(pfx
, modrm
);
25919 IRTemp tD
= newTemp(Ity_V128
);
25920 Bool isA
= have66noF2noF3(pfx
);
25921 HChar ch
= isA
? 'a' : 'u';
25922 if (epartIsReg(modrm
)) {
25923 UInt rS
= eregOfRexRM(pfx
, modrm
);
25925 assign(tD
, getXMMReg(rS
));
25926 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
25928 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25931 gen_SEGV_if_not_16_aligned(addr
);
25932 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
25933 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameXMMReg(rD
));
25935 putYMMRegLoAndZU(rD
, mkexpr(tD
));
25936 goto decode_success
;
25941 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
25942 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25943 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, True
/*writesYmm*/);
25944 goto decode_success
;
25946 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
25947 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25948 delta
= dis_PSHUFD_32x8( vbi
, pfx
, delta
);
25949 goto decode_success
;
25951 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
25952 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25953 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
25954 True
/*isAvx*/, False
/*!xIsH*/ );
25955 goto decode_success
;
25957 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
25958 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25959 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, False
/*!xIsH*/ );
25960 goto decode_success
;
25962 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
25963 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
25964 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
25965 True
/*isAvx*/, True
/*xIsH*/ );
25966 goto decode_success
;
25968 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
25969 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
25970 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, True
/*xIsH*/ );
25971 goto decode_success
;
25976 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
25977 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
25978 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
25979 if (have66noF2noF3(pfx
)
25980 && 0==getVexL(pfx
)/*128*/
25981 && epartIsReg(getUChar(delta
))) {
25982 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
25983 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
25984 "vpsrlw", Iop_ShrN16x8
);
25986 goto decode_success
;
25988 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
25989 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
25990 "vpsraw", Iop_SarN16x8
);
25992 goto decode_success
;
25994 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
25995 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
25996 "vpsllw", Iop_ShlN16x8
);
25998 goto decode_success
;
26000 /* else fall through */
26002 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26003 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26004 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26005 if (have66noF2noF3(pfx
)
26006 && 1==getVexL(pfx
)/*256*/
26007 && epartIsReg(getUChar(delta
))) {
26008 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26009 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26010 "vpsrlw", Iop_ShrN16x16
);
26012 goto decode_success
;
26014 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26015 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26016 "vpsraw", Iop_SarN16x16
);
26018 goto decode_success
;
26020 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26021 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26022 "vpsllw", Iop_ShlN16x16
);
26024 goto decode_success
;
26026 /* else fall through */
26031 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26032 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26033 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26034 if (have66noF2noF3(pfx
)
26035 && 0==getVexL(pfx
)/*128*/
26036 && epartIsReg(getUChar(delta
))) {
26037 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26038 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26039 "vpsrld", Iop_ShrN32x4
);
26041 goto decode_success
;
26043 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26044 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26045 "vpsrad", Iop_SarN32x4
);
26047 goto decode_success
;
26049 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26050 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26051 "vpslld", Iop_ShlN32x4
);
26053 goto decode_success
;
26055 /* else fall through */
26057 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26058 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26059 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26060 if (have66noF2noF3(pfx
)
26061 && 1==getVexL(pfx
)/*256*/
26062 && epartIsReg(getUChar(delta
))) {
26063 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26064 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26065 "vpsrld", Iop_ShrN32x8
);
26067 goto decode_success
;
26069 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26070 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26071 "vpsrad", Iop_SarN32x8
);
26073 goto decode_success
;
26075 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26076 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26077 "vpslld", Iop_ShlN32x8
);
26079 goto decode_success
;
26081 /* else fall through */
26086 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26087 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26088 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26089 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26090 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
26091 && epartIsReg(getUChar(delta
))) {
26092 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26093 Int rD
= getVexNvvvv(pfx
);
26094 IRTemp vecS
= newTemp(Ity_V128
);
26095 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26096 Int imm
= (Int
)getUChar(delta
+1);
26097 DIP("vpsrldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26099 assign( vecS
, getXMMReg(rS
) );
26100 putYMMRegLoAndZU(rD
, mkexpr(math_PSRLDQ( vecS
, imm
)));
26102 goto decode_success
;
26104 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26105 Int imm
= (Int
)getUChar(delta
+1);
26106 DIP("vpslldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26108 assign( vecS
, getXMMReg(rS
) );
26109 putYMMRegLoAndZU(rD
, mkexpr(math_PSLLDQ( vecS
, imm
)));
26111 goto decode_success
;
26113 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26114 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26115 "vpsrlq", Iop_ShrN64x2
);
26117 goto decode_success
;
26119 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26120 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26121 "vpsllq", Iop_ShlN64x2
);
26123 goto decode_success
;
26125 /* else fall through */
26127 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26128 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26129 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26130 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26131 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
26132 && epartIsReg(getUChar(delta
))) {
26133 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26134 Int rD
= getVexNvvvv(pfx
);
26135 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26136 IRTemp vecS0
= newTemp(Ity_V128
);
26137 IRTemp vecS1
= newTemp(Ity_V128
);
26138 Int imm
= (Int
)getUChar(delta
+1);
26139 DIP("vpsrldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26141 assign( vecS0
, getYMMRegLane128(rS
, 0));
26142 assign( vecS1
, getYMMRegLane128(rS
, 1));
26143 putYMMRegLane128(rD
, 0, mkexpr(math_PSRLDQ( vecS0
, imm
)));
26144 putYMMRegLane128(rD
, 1, mkexpr(math_PSRLDQ( vecS1
, imm
)));
26146 goto decode_success
;
26148 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26149 IRTemp vecS0
= newTemp(Ity_V128
);
26150 IRTemp vecS1
= newTemp(Ity_V128
);
26151 Int imm
= (Int
)getUChar(delta
+1);
26152 DIP("vpslldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26154 assign( vecS0
, getYMMRegLane128(rS
, 0));
26155 assign( vecS1
, getYMMRegLane128(rS
, 1));
26156 putYMMRegLane128(rD
, 0, mkexpr(math_PSLLDQ( vecS0
, imm
)));
26157 putYMMRegLane128(rD
, 1, mkexpr(math_PSLLDQ( vecS1
, imm
)));
26159 goto decode_success
;
26161 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26162 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26163 "vpsrlq", Iop_ShrN64x4
);
26165 goto decode_success
;
26167 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26168 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26169 "vpsllq", Iop_ShlN64x4
);
26171 goto decode_success
;
26173 /* else fall through */
26178 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26179 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26180 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26181 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26182 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x16
);
26183 goto decode_success
;
26185 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26186 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26187 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26188 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26189 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x32
);
26190 goto decode_success
;
26195 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26196 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26197 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26198 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26199 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x8
);
26200 goto decode_success
;
26202 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26203 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26204 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26205 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26206 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x16
);
26207 goto decode_success
;
26212 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26213 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26214 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26215 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26216 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x4
);
26217 goto decode_success
;
26219 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26220 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26221 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26222 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26223 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x8
);
26224 goto decode_success
;
26229 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26230 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26232 IRTemp zero128
= newTemp(Ity_V128
);
26233 assign(zero128
, mkV128(0));
26234 for (i
= 0; i
< 16; i
++) {
26235 putYMMRegLane128(i
, 1, mkexpr(zero128
));
26237 DIP("vzeroupper\n");
26238 goto decode_success
;
26240 /* VZEROALL = VEX.256.0F.WIG 77 */
26241 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26243 IRTemp zero128
= newTemp(Ity_V128
);
26244 assign(zero128
, mkV128(0));
26245 for (i
= 0; i
< 16; i
++) {
26246 putYMMRegLoAndZU(i
, mkexpr(zero128
));
26249 goto decode_success
;
26255 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26256 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26257 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26258 IRTemp sV
= newTemp(Ity_V128
);
26259 IRTemp dV
= newTemp(Ity_V128
);
26260 Bool isAdd
= opc
== 0x7C;
26261 const HChar
* str
= isAdd
? "add" : "sub";
26262 UChar modrm
= getUChar(delta
);
26263 UInt rG
= gregOfRexRM(pfx
,modrm
);
26264 UInt rV
= getVexNvvvv(pfx
);
26265 if (epartIsReg(modrm
)) {
26266 UInt rE
= eregOfRexRM(pfx
,modrm
);
26267 assign( sV
, getXMMReg(rE
) );
26268 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26269 nameXMMReg(rV
), nameXMMReg(rG
));
26272 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26273 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26274 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26275 nameXMMReg(rV
), nameXMMReg(rG
));
26278 assign( dV
, getXMMReg(rV
) );
26279 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPS_128 ( dV
, sV
, isAdd
) ) );
26281 goto decode_success
;
26283 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26284 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26285 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26286 IRTemp sV
= newTemp(Ity_V256
);
26287 IRTemp dV
= newTemp(Ity_V256
);
26288 IRTemp s1
, s0
, d1
, d0
;
26289 Bool isAdd
= opc
== 0x7C;
26290 const HChar
* str
= isAdd
? "add" : "sub";
26291 UChar modrm
= getUChar(delta
);
26292 UInt rG
= gregOfRexRM(pfx
,modrm
);
26293 UInt rV
= getVexNvvvv(pfx
);
26294 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26295 if (epartIsReg(modrm
)) {
26296 UInt rE
= eregOfRexRM(pfx
,modrm
);
26297 assign( sV
, getYMMReg(rE
) );
26298 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26299 nameYMMReg(rV
), nameYMMReg(rG
));
26302 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26303 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26304 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26305 nameYMMReg(rV
), nameYMMReg(rG
));
26308 assign( dV
, getYMMReg(rV
) );
26309 breakupV256toV128s( dV
, &d1
, &d0
);
26310 breakupV256toV128s( sV
, &s1
, &s0
);
26311 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26312 mkexpr( math_HADDPS_128 ( d1
, s1
, isAdd
) ),
26313 mkexpr( math_HADDPS_128 ( d0
, s0
, isAdd
) ) ) );
26315 goto decode_success
;
26317 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26318 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26319 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26320 IRTemp sV
= newTemp(Ity_V128
);
26321 IRTemp dV
= newTemp(Ity_V128
);
26322 Bool isAdd
= opc
== 0x7C;
26323 const HChar
* str
= isAdd
? "add" : "sub";
26324 UChar modrm
= getUChar(delta
);
26325 UInt rG
= gregOfRexRM(pfx
,modrm
);
26326 UInt rV
= getVexNvvvv(pfx
);
26327 if (epartIsReg(modrm
)) {
26328 UInt rE
= eregOfRexRM(pfx
,modrm
);
26329 assign( sV
, getXMMReg(rE
) );
26330 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26331 nameXMMReg(rV
), nameXMMReg(rG
));
26334 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26335 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26336 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26337 nameXMMReg(rV
), nameXMMReg(rG
));
26340 assign( dV
, getXMMReg(rV
) );
26341 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPD_128 ( dV
, sV
, isAdd
) ) );
26343 goto decode_success
;
26345 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26346 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26347 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26348 IRTemp sV
= newTemp(Ity_V256
);
26349 IRTemp dV
= newTemp(Ity_V256
);
26350 IRTemp s1
, s0
, d1
, d0
;
26351 Bool isAdd
= opc
== 0x7C;
26352 const HChar
* str
= isAdd
? "add" : "sub";
26353 UChar modrm
= getUChar(delta
);
26354 UInt rG
= gregOfRexRM(pfx
,modrm
);
26355 UInt rV
= getVexNvvvv(pfx
);
26356 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26357 if (epartIsReg(modrm
)) {
26358 UInt rE
= eregOfRexRM(pfx
,modrm
);
26359 assign( sV
, getYMMReg(rE
) );
26360 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26361 nameYMMReg(rV
), nameYMMReg(rG
));
26364 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26365 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26366 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26367 nameYMMReg(rV
), nameYMMReg(rG
));
26370 assign( dV
, getYMMReg(rV
) );
26371 breakupV256toV128s( dV
, &d1
, &d0
);
26372 breakupV256toV128s( sV
, &s1
, &s0
);
26373 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26374 mkexpr( math_HADDPD_128 ( d1
, s1
, isAdd
) ),
26375 mkexpr( math_HADDPD_128 ( d0
, s0
, isAdd
) ) ) );
26377 goto decode_success
;
26382 /* Note the Intel docs don't make sense for this. I think they
26383 are wrong. They seem to imply it is a store when in fact I
26384 think it is a load. Also it's unclear whether this is W0, W1
26386 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26387 if (haveF3no66noF2(pfx
)
26388 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26389 vassert(sz
== 4); /* even tho we are transferring 8, not 4. */
26390 UChar modrm
= getUChar(delta
);
26391 UInt rG
= gregOfRexRM(pfx
,modrm
);
26392 if (epartIsReg(modrm
)) {
26393 UInt rE
= eregOfRexRM(pfx
,modrm
);
26394 putXMMRegLane64( rG
, 0, getXMMRegLane64( rE
, 0 ));
26395 DIP("vmovq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
26398 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26399 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
26400 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26403 /* zero bits 255:64 */
26404 putXMMRegLane64( rG
, 1, mkU64(0) );
26405 putYMMRegLane128( rG
, 1, mkV128(0) );
26406 goto decode_success
;
26408 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26409 /* Moves from G to E, so is a store-form insn */
26410 /* Intel docs list this in the VMOVD entry for some reason. */
26411 if (have66noF2noF3(pfx
)
26412 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
26413 UChar modrm
= getUChar(delta
);
26414 UInt rG
= gregOfRexRM(pfx
,modrm
);
26415 if (epartIsReg(modrm
)) {
26416 UInt rE
= eregOfRexRM(pfx
,modrm
);
26417 DIP("vmovq %s,%s\n", nameXMMReg(rG
), nameIReg64(rE
));
26418 putIReg64(rE
, getXMMRegLane64(rG
, 0));
26421 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26422 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0) );
26423 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26426 goto decode_success
;
26428 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26429 /* Moves from G to E, so is a store-form insn */
26430 if (have66noF2noF3(pfx
)
26431 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26432 UChar modrm
= getUChar(delta
);
26433 UInt rG
= gregOfRexRM(pfx
,modrm
);
26434 if (epartIsReg(modrm
)) {
26435 UInt rE
= eregOfRexRM(pfx
,modrm
);
26436 DIP("vmovd %s,%s\n", nameXMMReg(rG
), nameIReg32(rE
));
26437 putIReg32(rE
, getXMMRegLane32(rG
, 0));
26440 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26441 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0) );
26442 DIP("vmovd %s,%s\n", dis_buf
, nameXMMReg(rG
));
26445 goto decode_success
;
26450 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26451 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26452 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26453 && 1==getVexL(pfx
)/*256*/) {
26454 UChar modrm
= getUChar(delta
);
26455 UInt rS
= gregOfRexRM(pfx
, modrm
);
26456 IRTemp tS
= newTemp(Ity_V256
);
26457 Bool isA
= have66noF2noF3(pfx
);
26458 HChar ch
= isA
? 'a' : 'u';
26459 assign(tS
, getYMMReg(rS
));
26460 if (epartIsReg(modrm
)) {
26461 UInt rD
= eregOfRexRM(pfx
, modrm
);
26463 putYMMReg(rD
, mkexpr(tS
));
26464 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
26466 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26469 gen_SEGV_if_not_32_aligned(addr
);
26470 storeLE(mkexpr(addr
), mkexpr(tS
));
26471 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), dis_buf
);
26473 goto decode_success
;
26475 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26476 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26477 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26478 && 0==getVexL(pfx
)/*128*/) {
26479 UChar modrm
= getUChar(delta
);
26480 UInt rS
= gregOfRexRM(pfx
, modrm
);
26481 IRTemp tS
= newTemp(Ity_V128
);
26482 Bool isA
= have66noF2noF3(pfx
);
26483 HChar ch
= isA
? 'a' : 'u';
26484 assign(tS
, getXMMReg(rS
));
26485 if (epartIsReg(modrm
)) {
26486 UInt rD
= eregOfRexRM(pfx
, modrm
);
26488 putYMMRegLoAndZU(rD
, mkexpr(tS
));
26489 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
26491 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26494 gen_SEGV_if_not_16_aligned(addr
);
26495 storeLE(mkexpr(addr
), mkexpr(tS
));
26496 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), dis_buf
);
26498 goto decode_success
;
26503 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26504 if (haveNo66noF2noF3(pfx
)
26505 && 0==getVexL(pfx
)/*LZ*/
26506 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26507 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
26509 delta
= dis_STMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26510 goto decode_success
;
26512 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26513 if (haveNo66noF2noF3(pfx
)
26514 && 0==getVexL(pfx
)/*LZ*/
26515 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26516 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
26518 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26519 goto decode_success
;
26524 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26525 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26526 if (haveF2no66noF3(pfx
)) {
26527 Long delta0
= delta
;
26528 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26529 "vcmpsd", False
/*!all_lanes*/,
26531 if (delta
> delta0
) goto decode_success
;
26532 /* else fall through -- decoding has failed */
26534 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26535 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26536 if (haveF3no66noF2(pfx
)) {
26537 Long delta0
= delta
;
26538 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26539 "vcmpss", False
/*!all_lanes*/,
26541 if (delta
> delta0
) goto decode_success
;
26542 /* else fall through -- decoding has failed */
26544 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26545 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26546 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26547 Long delta0
= delta
;
26548 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26549 "vcmppd", True
/*all_lanes*/,
26551 if (delta
> delta0
) goto decode_success
;
26552 /* else fall through -- decoding has failed */
26554 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26555 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26556 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26557 Long delta0
= delta
;
26558 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26559 "vcmppd", 8/*sz*/);
26560 if (delta
> delta0
) goto decode_success
;
26561 /* else fall through -- decoding has failed */
26563 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26564 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26565 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26566 Long delta0
= delta
;
26567 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26568 "vcmpps", True
/*all_lanes*/,
26570 if (delta
> delta0
) goto decode_success
;
26571 /* else fall through -- decoding has failed */
26573 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26574 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26575 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26576 Long delta0
= delta
;
26577 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26578 "vcmpps", 4/*sz*/);
26579 if (delta
> delta0
) goto decode_success
;
26580 /* else fall through -- decoding has failed */
26585 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26586 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26587 UChar modrm
= getUChar(delta
);
26588 UInt rG
= gregOfRexRM(pfx
, modrm
);
26589 UInt rV
= getVexNvvvv(pfx
);
26591 IRTemp new16
= newTemp(Ity_I16
);
26593 if ( epartIsReg( modrm
) ) {
26594 imm8
= (Int
)(getUChar(delta
+1) & 7);
26595 assign( new16
, unop(Iop_32to16
,
26596 getIReg32(eregOfRexRM(pfx
,modrm
))) );
26598 DIP( "vpinsrw $%d,%s,%s\n", imm8
,
26599 nameIReg32( eregOfRexRM(pfx
, modrm
) ), nameXMMReg(rG
) );
26601 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26602 imm8
= (Int
)(getUChar(delta
+alen
) & 7);
26603 assign( new16
, loadLE( Ity_I16
, mkexpr(addr
) ));
26605 DIP( "vpinsrw $%d,%s,%s\n",
26606 imm8
, dis_buf
, nameXMMReg(rG
) );
26609 IRTemp src_vec
= newTemp(Ity_V128
);
26610 assign(src_vec
, getXMMReg( rV
));
26611 IRTemp res_vec
= math_PINSRW_128( src_vec
, new16
, imm8
);
26612 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
26614 goto decode_success
;
26619 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26620 if (have66noF2noF3(pfx
)
26621 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26622 Long delta0
= delta
;
26623 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
26625 if (delta
> delta0
) goto decode_success
;
26626 /* else fall through -- decoding has failed */
26631 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26632 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26633 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26635 IRTemp eV
= newTemp(Ity_V128
);
26636 IRTemp vV
= newTemp(Ity_V128
);
26637 UInt modrm
= getUChar(delta
);
26638 UInt rG
= gregOfRexRM(pfx
,modrm
);
26639 UInt rV
= getVexNvvvv(pfx
);
26640 assign( vV
, getXMMReg(rV
) );
26641 if (epartIsReg(modrm
)) {
26642 UInt rE
= eregOfRexRM(pfx
,modrm
);
26643 assign( eV
, getXMMReg(rE
) );
26644 imm8
= (Int
)getUChar(delta
+1);
26646 DIP("vshufps $%d,%s,%s,%s\n",
26647 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26649 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26650 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26651 imm8
= (Int
)getUChar(delta
+alen
);
26653 DIP("vshufps $%d,%s,%s,%s\n",
26654 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26656 IRTemp res
= math_SHUFPS_128( eV
, vV
, imm8
);
26657 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26659 goto decode_success
;
26661 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26662 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26663 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26665 IRTemp eV
= newTemp(Ity_V256
);
26666 IRTemp vV
= newTemp(Ity_V256
);
26667 UInt modrm
= getUChar(delta
);
26668 UInt rG
= gregOfRexRM(pfx
,modrm
);
26669 UInt rV
= getVexNvvvv(pfx
);
26670 assign( vV
, getYMMReg(rV
) );
26671 if (epartIsReg(modrm
)) {
26672 UInt rE
= eregOfRexRM(pfx
,modrm
);
26673 assign( eV
, getYMMReg(rE
) );
26674 imm8
= (Int
)getUChar(delta
+1);
26676 DIP("vshufps $%d,%s,%s,%s\n",
26677 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26679 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26680 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26681 imm8
= (Int
)getUChar(delta
+alen
);
26683 DIP("vshufps $%d,%s,%s,%s\n",
26684 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26686 IRTemp res
= math_SHUFPS_256( eV
, vV
, imm8
);
26687 putYMMReg( rG
, mkexpr(res
) );
26689 goto decode_success
;
26691 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26692 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26693 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26695 IRTemp eV
= newTemp(Ity_V128
);
26696 IRTemp vV
= newTemp(Ity_V128
);
26697 UInt modrm
= getUChar(delta
);
26698 UInt rG
= gregOfRexRM(pfx
,modrm
);
26699 UInt rV
= getVexNvvvv(pfx
);
26700 assign( vV
, getXMMReg(rV
) );
26701 if (epartIsReg(modrm
)) {
26702 UInt rE
= eregOfRexRM(pfx
,modrm
);
26703 assign( eV
, getXMMReg(rE
) );
26704 imm8
= (Int
)getUChar(delta
+1);
26706 DIP("vshufpd $%d,%s,%s,%s\n",
26707 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26709 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26710 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26711 imm8
= (Int
)getUChar(delta
+alen
);
26713 DIP("vshufpd $%d,%s,%s,%s\n",
26714 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26716 IRTemp res
= math_SHUFPD_128( eV
, vV
, imm8
);
26717 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26719 goto decode_success
;
26721 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26722 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26723 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26725 IRTemp eV
= newTemp(Ity_V256
);
26726 IRTemp vV
= newTemp(Ity_V256
);
26727 UInt modrm
= getUChar(delta
);
26728 UInt rG
= gregOfRexRM(pfx
,modrm
);
26729 UInt rV
= getVexNvvvv(pfx
);
26730 assign( vV
, getYMMReg(rV
) );
26731 if (epartIsReg(modrm
)) {
26732 UInt rE
= eregOfRexRM(pfx
,modrm
);
26733 assign( eV
, getYMMReg(rE
) );
26734 imm8
= (Int
)getUChar(delta
+1);
26736 DIP("vshufpd $%d,%s,%s,%s\n",
26737 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26739 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26740 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26741 imm8
= (Int
)getUChar(delta
+alen
);
26743 DIP("vshufpd $%d,%s,%s,%s\n",
26744 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26746 IRTemp res
= math_SHUFPD_256( eV
, vV
, imm8
);
26747 putYMMReg( rG
, mkexpr(res
) );
26749 goto decode_success
;
26754 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26755 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26756 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26757 uses_vvvv
, vbi
, pfx
, delta
,
26758 "vaddsubpd", math_ADDSUBPD_128
);
26759 goto decode_success
;
26761 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26762 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26763 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26764 uses_vvvv
, vbi
, pfx
, delta
,
26765 "vaddsubpd", math_ADDSUBPD_256
);
26766 goto decode_success
;
26768 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26769 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26770 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26771 uses_vvvv
, vbi
, pfx
, delta
,
26772 "vaddsubps", math_ADDSUBPS_128
);
26773 goto decode_success
;
26775 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26776 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26777 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26778 uses_vvvv
, vbi
, pfx
, delta
,
26779 "vaddsubps", math_ADDSUBPS_256
);
26780 goto decode_success
;
26785 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26786 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26787 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26788 "vpsrlw", Iop_ShrN16x8
);
26790 goto decode_success
;
26793 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26794 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26795 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26796 "vpsrlw", Iop_ShrN16x16
);
26798 goto decode_success
;
26804 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26805 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26806 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26807 "vpsrld", Iop_ShrN32x4
);
26809 goto decode_success
;
26811 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26812 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26813 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26814 "vpsrld", Iop_ShrN32x8
);
26816 goto decode_success
;
26821 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26822 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26823 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26824 "vpsrlq", Iop_ShrN64x2
);
26826 goto decode_success
;
26828 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26829 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26830 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26831 "vpsrlq", Iop_ShrN64x4
);
26833 goto decode_success
;
26838 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26839 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26840 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26841 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26842 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x2
);
26843 goto decode_success
;
26845 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26846 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26847 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26848 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26849 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x4
);
26850 goto decode_success
;
26855 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26856 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26857 delta
= dis_AVX128_E_V_to_G(
26858 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x8
);
26859 goto decode_success
;
26861 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26862 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26863 delta
= dis_AVX256_E_V_to_G(
26864 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x16
);
26865 goto decode_success
;
26870 /* I can't even find any Intel docs for this one. */
26871 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26872 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26874 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
26875 && 0==getRexW(pfx
)/*this might be redundant, dunno*/) {
26876 UChar modrm
= getUChar(delta
);
26877 UInt rG
= gregOfRexRM(pfx
,modrm
);
26878 if (epartIsReg(modrm
)) {
26879 /* fall through, awaiting test case */
26880 /* dst: lo half copied, hi half zeroed */
26882 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26883 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0 ));
26884 DIP("vmovq %s,%s\n", nameXMMReg(rG
), dis_buf
);
26886 goto decode_success
;
26892 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
26893 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26894 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
26895 goto decode_success
;
26897 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
26898 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26899 delta
= dis_PMOVMSKB_256( vbi
, pfx
, delta
);
26900 goto decode_success
;
26905 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
26906 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26907 delta
= dis_AVX128_E_V_to_G(
26908 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux16
);
26909 goto decode_success
;
26911 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
26912 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26913 delta
= dis_AVX256_E_V_to_G(
26914 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux32
);
26915 goto decode_success
;
26920 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
26921 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26922 delta
= dis_AVX128_E_V_to_G(
26923 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux8
);
26924 goto decode_success
;
26926 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
26927 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26928 delta
= dis_AVX256_E_V_to_G(
26929 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux16
);
26930 goto decode_success
;
26935 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
26936 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26937 delta
= dis_AVX128_E_V_to_G(
26938 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux16
);
26939 goto decode_success
;
26941 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
26942 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26943 delta
= dis_AVX256_E_V_to_G(
26944 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux32
);
26945 goto decode_success
;
26950 /* VPAND r/m, rV, r ::: r = rV & r/m */
26951 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
26952 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26953 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26954 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV128
);
26955 goto decode_success
;
26957 /* VPAND r/m, rV, r ::: r = rV & r/m */
26958 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
26959 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26960 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26961 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV256
);
26962 goto decode_success
;
26967 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
26968 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26969 delta
= dis_AVX128_E_V_to_G(
26970 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux16
);
26971 goto decode_success
;
26973 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
26974 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26975 delta
= dis_AVX256_E_V_to_G(
26976 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux32
);
26977 goto decode_success
;
26982 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
26983 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26984 delta
= dis_AVX128_E_V_to_G(
26985 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux8
);
26986 goto decode_success
;
26988 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
26989 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26990 delta
= dis_AVX256_E_V_to_G(
26991 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux16
);
26992 goto decode_success
;
26997 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
26998 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26999 delta
= dis_AVX128_E_V_to_G(
27000 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux16
);
27001 goto decode_success
;
27003 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27004 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27005 delta
= dis_AVX256_E_V_to_G(
27006 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux32
);
27007 goto decode_success
;
27012 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27013 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27014 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27015 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27016 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV128
,
27017 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27018 goto decode_success
;
27020 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27021 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27022 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27023 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27024 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV256
,
27025 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27026 goto decode_success
;
27031 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27032 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27033 delta
= dis_AVX128_E_V_to_G(
27034 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux16
);
27035 goto decode_success
;
27037 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27038 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27039 delta
= dis_AVX256_E_V_to_G(
27040 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux32
);
27041 goto decode_success
;
27046 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27047 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27048 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27049 "vpsraw", Iop_SarN16x8
);
27051 goto decode_success
;
27053 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27054 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27055 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27056 "vpsraw", Iop_SarN16x16
);
27058 goto decode_success
;
27063 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27064 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27065 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27066 "vpsrad", Iop_SarN32x4
);
27068 goto decode_success
;
27070 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27071 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27072 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27073 "vpsrad", Iop_SarN32x8
);
27075 goto decode_success
;
27080 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27081 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27082 delta
= dis_AVX128_E_V_to_G(
27083 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux8
);
27084 goto decode_success
;
27086 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27087 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27088 delta
= dis_AVX256_E_V_to_G(
27089 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux16
);
27090 goto decode_success
;
27095 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27096 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27097 delta
= dis_AVX128_E_V_to_G(
27098 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux8
);
27099 goto decode_success
;
27101 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27102 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27103 delta
= dis_AVX256_E_V_to_G(
27104 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux16
);
27105 goto decode_success
;
27110 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27111 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27112 delta
= dis_AVX128_E_V_to_G(
27113 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx8
);
27114 goto decode_success
;
27116 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27117 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27118 delta
= dis_AVX256_E_V_to_G(
27119 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx16
);
27120 goto decode_success
;
27125 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27126 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
27127 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, True
/*isAvx*/);
27128 goto decode_success
;
27130 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27131 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
27132 delta
= dis_CVTDQ2PD_256(vbi
, pfx
, delta
);
27133 goto decode_success
;
27135 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27136 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27137 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27139 goto decode_success
;
27141 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27142 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27143 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, True
/*r2zero*/);
27144 goto decode_success
;
27146 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27147 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27148 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27150 goto decode_success
;
27152 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27153 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27154 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, False
/*!r2zero*/);
27155 goto decode_success
;
27160 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27161 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27162 UChar modrm
= getUChar(delta
);
27163 UInt rG
= gregOfRexRM(pfx
,modrm
);
27164 if (!epartIsReg(modrm
)) {
27165 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27166 gen_SEGV_if_not_16_aligned( addr
);
27167 storeLE( mkexpr(addr
), getXMMReg(rG
) );
27168 DIP("vmovntdq %s,%s\n", dis_buf
, nameXMMReg(rG
));
27170 goto decode_success
;
27172 /* else fall through */
27174 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27175 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27176 UChar modrm
= getUChar(delta
);
27177 UInt rG
= gregOfRexRM(pfx
,modrm
);
27178 if (!epartIsReg(modrm
)) {
27179 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27180 gen_SEGV_if_not_32_aligned( addr
);
27181 storeLE( mkexpr(addr
), getYMMReg(rG
) );
27182 DIP("vmovntdq %s,%s\n", dis_buf
, nameYMMReg(rG
));
27184 goto decode_success
;
27186 /* else fall through */
27191 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27192 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27193 delta
= dis_AVX128_E_V_to_G(
27194 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx16
);
27195 goto decode_success
;
27197 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27198 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27199 delta
= dis_AVX256_E_V_to_G(
27200 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx32
);
27201 goto decode_success
;
27206 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27207 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27208 delta
= dis_AVX128_E_V_to_G(
27209 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx8
);
27210 goto decode_success
;
27212 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27213 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27214 delta
= dis_AVX256_E_V_to_G(
27215 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx16
);
27216 goto decode_success
;
27221 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27222 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27223 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27224 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27225 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx8
);
27226 goto decode_success
;
27228 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27229 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27230 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27231 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27232 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx16
);
27233 goto decode_success
;
27238 /* VPOR r/m, rV, r ::: r = rV | r/m */
27239 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27240 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27241 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27242 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV128
);
27243 goto decode_success
;
27245 /* VPOR r/m, rV, r ::: r = rV | r/m */
27246 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27247 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27248 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27249 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV256
);
27250 goto decode_success
;
27255 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27256 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27257 delta
= dis_AVX128_E_V_to_G(
27258 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx16
);
27259 goto decode_success
;
27261 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27262 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27263 delta
= dis_AVX256_E_V_to_G(
27264 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx32
);
27265 goto decode_success
;
27270 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27271 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27272 delta
= dis_AVX128_E_V_to_G(
27273 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx8
);
27274 goto decode_success
;
27276 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27277 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27278 delta
= dis_AVX256_E_V_to_G(
27279 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx16
);
27280 goto decode_success
;
27285 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27286 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27287 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27288 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27289 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx8
);
27290 goto decode_success
;
27292 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27293 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27294 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27295 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27296 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx16
);
27297 goto decode_success
;
27302 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27303 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27304 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27305 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27306 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV128
);
27307 goto decode_success
;
27309 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27310 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27311 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27312 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27313 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV256
);
27314 goto decode_success
;
27319 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27320 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27321 UChar modrm
= getUChar(delta
);
27322 UInt rD
= gregOfRexRM(pfx
, modrm
);
27323 IRTemp tD
= newTemp(Ity_V256
);
27324 if (epartIsReg(modrm
)) break;
27325 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27327 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
27328 DIP("vlddqu %s,%s\n", dis_buf
, nameYMMReg(rD
));
27329 putYMMReg(rD
, mkexpr(tD
));
27330 goto decode_success
;
27332 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27333 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27334 UChar modrm
= getUChar(delta
);
27335 UInt rD
= gregOfRexRM(pfx
, modrm
);
27336 IRTemp tD
= newTemp(Ity_V128
);
27337 if (epartIsReg(modrm
)) break;
27338 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27340 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
27341 DIP("vlddqu %s,%s\n", dis_buf
, nameXMMReg(rD
));
27342 putYMMRegLoAndZU(rD
, mkexpr(tD
));
27343 goto decode_success
;
27348 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27349 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27350 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27351 "vpsllw", Iop_ShlN16x8
);
27353 goto decode_success
;
27356 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27357 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27358 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27359 "vpsllw", Iop_ShlN16x16
);
27361 goto decode_success
;
27367 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27368 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27369 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27370 "vpslld", Iop_ShlN32x4
);
27372 goto decode_success
;
27374 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27375 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27376 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27377 "vpslld", Iop_ShlN32x8
);
27379 goto decode_success
;
27384 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27385 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27386 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27387 "vpsllq", Iop_ShlN64x2
);
27389 goto decode_success
;
27391 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27392 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27393 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27394 "vpsllq", Iop_ShlN64x4
);
27396 goto decode_success
;
27401 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27402 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27403 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27404 uses_vvvv
, vbi
, pfx
, delta
,
27405 "vpmuludq", math_PMULUDQ_128
);
27406 goto decode_success
;
27408 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27409 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27410 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27411 uses_vvvv
, vbi
, pfx
, delta
,
27412 "vpmuludq", math_PMULUDQ_256
);
27413 goto decode_success
;
27418 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27419 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27420 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27421 uses_vvvv
, vbi
, pfx
, delta
,
27422 "vpmaddwd", math_PMADDWD_128
);
27423 goto decode_success
;
27425 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27426 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27427 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27428 uses_vvvv
, vbi
, pfx
, delta
,
27429 "vpmaddwd", math_PMADDWD_256
);
27430 goto decode_success
;
27435 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27436 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27437 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27438 uses_vvvv
, vbi
, pfx
, delta
,
27439 "vpsadbw", math_PSADBW_128
);
27440 goto decode_success
;
27442 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27443 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27444 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27445 uses_vvvv
, vbi
, pfx
, delta
,
27446 "vpsadbw", math_PSADBW_256
);
27447 goto decode_success
;
27452 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27453 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
27454 && epartIsReg(getUChar(delta
))) {
27455 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, True
/*isAvx*/ );
27456 goto decode_success
;
27461 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27462 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27463 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27464 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27465 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x16
);
27466 goto decode_success
;
27468 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27469 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27470 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27471 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27472 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x32
);
27473 goto decode_success
;
27478 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27479 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27480 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27481 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27482 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x8
);
27483 goto decode_success
;
27485 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27486 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27487 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27488 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27489 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x16
);
27490 goto decode_success
;
27495 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27496 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27497 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27498 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27499 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x4
);
27500 goto decode_success
;
27502 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27503 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27504 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27505 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27506 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x8
);
27507 goto decode_success
;
27512 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27513 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27514 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27515 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27516 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x2
);
27517 goto decode_success
;
27519 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27520 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27521 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27522 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27523 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x4
);
27524 goto decode_success
;
27529 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27530 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27531 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27532 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27533 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x16
);
27534 goto decode_success
;
27536 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27537 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27538 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27539 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27540 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x32
);
27541 goto decode_success
;
27546 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27547 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27548 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27549 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27550 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x8
);
27551 goto decode_success
;
27553 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27554 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27555 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27556 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27557 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x16
);
27558 goto decode_success
;
27563 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27564 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27565 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27566 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27567 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x4
);
27568 goto decode_success
;
27570 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27571 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27572 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27573 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27574 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x8
);
27575 goto decode_success
;
27592 /*------------------------------------------------------------*/
27594 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27596 /*------------------------------------------------------------*/
27598 static IRTemp
math_PERMILPS_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27600 /* In the control vector, zero out all but the bottom two bits of
27601 each 32-bit lane. */
27602 IRExpr
* cv1
= binop(Iop_ShrN32x4
,
27603 binop(Iop_ShlN32x4
, mkexpr(ctrlV
), mkU8(30)),
27605 /* And use the resulting cleaned-up control vector as steering
27606 in a Perm operation. */
27607 IRTemp res
= newTemp(Ity_V128
);
27608 assign(res
, binop(Iop_Perm32x4
, mkexpr(dataV
), cv1
));
27612 static IRTemp
math_PERMILPS_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27614 IRTemp dHi
, dLo
, cHi
, cLo
;
27615 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27616 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27617 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27618 IRTemp rHi
= math_PERMILPS_VAR_128( dHi
, cHi
);
27619 IRTemp rLo
= math_PERMILPS_VAR_128( dLo
, cLo
);
27620 IRTemp res
= newTemp(Ity_V256
);
27621 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27625 static IRTemp
math_PERMILPD_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27627 /* No cleverness here .. */
27628 IRTemp dHi
, dLo
, cHi
, cLo
;
27629 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27630 breakupV128to64s( dataV
, &dHi
, &dLo
);
27631 breakupV128to64s( ctrlV
, &cHi
, &cLo
);
27633 = IRExpr_ITE( unop(Iop_64to1
,
27634 binop(Iop_Shr64
, mkexpr(cHi
), mkU8(1))),
27635 mkexpr(dHi
), mkexpr(dLo
) );
27637 = IRExpr_ITE( unop(Iop_64to1
,
27638 binop(Iop_Shr64
, mkexpr(cLo
), mkU8(1))),
27639 mkexpr(dHi
), mkexpr(dLo
) );
27640 IRTemp res
= newTemp(Ity_V128
);
27641 assign(res
, binop(Iop_64HLtoV128
, rHi
, rLo
));
27645 static IRTemp
math_PERMILPD_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27647 IRTemp dHi
, dLo
, cHi
, cLo
;
27648 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27649 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27650 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27651 IRTemp rHi
= math_PERMILPD_VAR_128( dHi
, cHi
);
27652 IRTemp rLo
= math_PERMILPD_VAR_128( dLo
, cLo
);
27653 IRTemp res
= newTemp(Ity_V256
);
27654 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27658 static IRTemp
math_VPERMD ( IRTemp ctrlV
, IRTemp dataV
)
27660 /* In the control vector, zero out all but the bottom three bits of
27661 each 32-bit lane. */
27662 IRExpr
* cv1
= binop(Iop_ShrN32x8
,
27663 binop(Iop_ShlN32x8
, mkexpr(ctrlV
), mkU8(29)),
27665 /* And use the resulting cleaned-up control vector as steering
27666 in a Perm operation. */
27667 IRTemp res
= newTemp(Ity_V256
);
27668 assign(res
, binop(Iop_Perm32x8
, mkexpr(dataV
), cv1
));
27672 static Long
dis_SHIFTX ( /*OUT*/Bool
* uses_vvvv
,
27673 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
27674 const HChar
* opname
, IROp op8
)
27678 Int size
= getRexW(pfx
) ? 8 : 4;
27679 IRType ty
= szToITy(size
);
27680 IRTemp src
= newTemp(ty
);
27681 IRTemp amt
= newTemp(ty
);
27682 UChar rm
= getUChar(delta
);
27684 assign( amt
, getIRegV(size
,pfx
) );
27685 if (epartIsReg(rm
)) {
27686 assign( src
, getIRegE(size
,pfx
,rm
) );
27687 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
),
27688 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
27691 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27692 assign( src
, loadLE(ty
, mkexpr(addr
)) );
27693 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
), dis_buf
,
27694 nameIRegG(size
,pfx
,rm
));
27698 putIRegG( size
, pfx
, rm
,
27699 binop(mkSizedOp(ty
,op8
), mkexpr(src
),
27700 narrowTo(Ity_I8
, binop(mkSizedOp(ty
,Iop_And8
), mkexpr(amt
),
27701 mkU(ty
,8*size
-1)))) );
27702 /* Flags aren't modified. */
27708 static Long
dis_FMA ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
, UChar opc
)
27710 UChar modrm
= getUChar(delta
);
27711 UInt rG
= gregOfRexRM(pfx
, modrm
);
27712 UInt rV
= getVexNvvvv(pfx
);
27713 Bool scalar
= (opc
& 0xF) > 7 && (opc
& 1);
27714 IRType ty
= getRexW(pfx
) ? Ity_F64
: Ity_F32
;
27715 IRType vty
= scalar
? ty
: (getVexL(pfx
) ? Ity_V256
: Ity_V128
);
27716 IRTemp addr
= IRTemp_INVALID
;
27720 const HChar
*suffix
;
27721 const HChar
*order
;
27722 Bool negateRes
= False
;
27723 Bool negateZeven
= False
;
27724 Bool negateZodd
= False
;
27727 switch (opc
& 0xF) {
27728 case 0x6: name
= "addsub"; negateZeven
= True
; break;
27729 case 0x7: name
= "subadd"; negateZodd
= True
; break;
27731 case 0x9: name
= "add"; break;
27733 case 0xB: name
= "sub"; negateZeven
= True
; negateZodd
= True
;
27736 case 0xD: name
= "add"; negateRes
= True
; negateZeven
= True
;
27737 negateZodd
= True
; break;
27739 case 0xF: name
= "sub"; negateRes
= True
; break;
27740 default: vpanic("dis_FMA(amd64)"); break;
27742 switch (opc
& 0xF0) {
27743 case 0x90: order
= "132"; break;
27744 case 0xA0: order
= "213"; break;
27745 case 0xB0: order
= "231"; break;
27746 default: vpanic("dis_FMA(amd64)"); break;
27749 suffix
= ty
== Ity_F64
? "sd" : "ss";
27751 suffix
= ty
== Ity_F64
? "pd" : "ps";
27754 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27755 count
= sizeofIRType(vty
) / sizeofIRType(ty
);
27756 vassert(count
== 1 || count
== 2 || count
== 4 || count
== 8);
27758 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27760 IRExpr
*sX
[8], *sY
[8], *sZ
[8], *res
[8];
27761 for (i
= 0; i
< 8; i
++) sX
[i
] = sY
[i
] = sZ
[i
] = res
[i
] = NULL
;
27763 IRExpr
* (*getYMMRegLane
)(UInt
,Int
)
27764 = ty
== Ity_F32
? getYMMRegLane32F
: getYMMRegLane64F
;
27765 void (*putYMMRegLane
)(UInt
,Int
,IRExpr
*)
27766 = ty
== Ity_F32
? putYMMRegLane32F
: putYMMRegLane64F
;
27768 for (i
= 0; i
< count
; i
++) {
27769 sX
[i
] = getYMMRegLane(rG
, i
);
27770 sZ
[i
] = getYMMRegLane(rV
, i
);
27773 if (epartIsReg(modrm
)) {
27774 UInt rE
= eregOfRexRM(pfx
, modrm
);
27776 for (i
= 0; i
< count
; i
++) {
27777 sY
[i
] = getYMMRegLane(rE
, i
);
27779 if (vty
== Ity_V256
) {
27780 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27781 name
, order
, suffix
, nameYMMReg(rE
), nameYMMReg(rV
),
27784 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27785 name
, order
, suffix
, nameXMMReg(rE
), nameXMMReg(rV
),
27789 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27791 for (i
= 0; i
< count
; i
++) {
27792 sY
[i
] = loadLE(ty
, binop(Iop_Add64
, mkexpr(addr
),
27793 mkU64(i
* sizeofIRType(ty
))));
27795 if (vty
== Ity_V256
) {
27796 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27797 name
, order
, suffix
, dis_buf
, nameYMMReg(rV
),
27800 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27801 name
, order
, suffix
, dis_buf
, nameXMMReg(rV
),
27806 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27807 order, swap them around. */
27809 # define COPY_ARR(_dst, _src) \
27810 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27812 if ((opc
& 0xF0) != 0x90) {
27814 COPY_ARR(temp
, sX
);
27815 if ((opc
& 0xF0) == 0xA0) {
27818 COPY_ARR(sY
, temp
);
27821 COPY_ARR(sZ
, temp
);
27827 for (i
= 0; i
< count
; i
++) {
27828 IROp opNEG
= ty
== Ity_F64
? Iop_NegF64
: Iop_NegF32
;
27829 if ((i
& 1) ? negateZodd
: negateZeven
) {
27830 sZ
[i
] = unop(opNEG
, sZ
[i
]);
27832 res
[i
] = IRExpr_Qop(ty
== Ity_F64
? Iop_MAddF64
: Iop_MAddF32
,
27833 get_FAKE_roundingmode(), sX
[i
], sY
[i
], sZ
[i
]);
27835 res
[i
] = unop(opNEG
, res
[i
]);
27839 for (i
= 0; i
< count
; i
++) {
27840 putYMMRegLane(rG
, i
, res
[i
]);
27844 case Ity_F32
: putYMMRegLane32(rG
, 1, mkU32(0)); /*fallthru*/
27845 case Ity_F64
: putYMMRegLane64(rG
, 1, mkU64(0)); /*fallthru*/
27846 case Ity_V128
: putYMMRegLane128(rG
, 1, mkV128(0)); /*fallthru*/
27847 case Ity_V256
: break;
27848 default: vassert(0);
27855 /* Masked load or masked store. */
27856 static ULong
dis_VMASKMOV ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
27857 Prefix pfx
, Long delta
,
27858 const HChar
* opname
, Bool isYMM
, IRType ty
,
27864 UChar modrm
= getUChar(delta
);
27865 UInt rG
= gregOfRexRM(pfx
,modrm
);
27866 UInt rV
= getVexNvvvv(pfx
);
27868 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27871 /**/ if (isLoad
&& isYMM
) {
27872 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
27874 else if (isLoad
&& !isYMM
) {
27875 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
27878 else if (!isLoad
&& isYMM
) {
27879 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rG
), nameYMMReg(rV
), dis_buf
);
27882 vassert(!isLoad
&& !isYMM
);
27883 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rG
), nameXMMReg(rV
), dis_buf
);
27886 vassert(ty
== Ity_I32
|| ty
== Ity_I64
);
27887 Bool laneIs32
= ty
== Ity_I32
;
27889 Int nLanes
= (isYMM
? 2 : 1) * (laneIs32
? 4 : 2);
27891 for (i
= 0; i
< nLanes
; i
++) {
27892 IRExpr
* shAmt
= laneIs32
? mkU8(31) : mkU8(63);
27893 IRExpr
* one
= laneIs32
? mkU32(1) : mkU64(1);
27894 IROp opSHR
= laneIs32
? Iop_Shr32
: Iop_Shr64
;
27895 IROp opEQ
= laneIs32
? Iop_CmpEQ32
: Iop_CmpEQ64
;
27896 IRExpr
* lane
= (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rV
, i
);
27898 IRTemp cond
= newTemp(Ity_I1
);
27899 assign(cond
, binop(opEQ
, binop(opSHR
, lane
, shAmt
), one
));
27901 IRTemp data
= newTemp(ty
);
27902 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
),
27903 mkU64(i
* (laneIs32
? 4 : 8)));
27907 Iend_LE
, laneIs32
? ILGop_Ident32
: ILGop_Ident64
,
27908 data
, ea
, laneIs32
? mkU32(0) : mkU64(0), mkexpr(cond
)
27910 (laneIs32
? putYMMRegLane32
: putYMMRegLane64
)( rG
, i
, mkexpr(data
) );
27912 assign(data
, (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rG
, i
));
27913 stmt( IRStmt_StoreG(Iend_LE
, ea
, mkexpr(data
), mkexpr(cond
)) );
27917 if (isLoad
&& !isYMM
)
27918 putYMMRegLane128( rG
, 1, mkV128(0) );
27926 static ULong
dis_VGATHER ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
27927 Prefix pfx
, Long delta
,
27928 const HChar
* opname
, Bool isYMM
,
27929 Bool isVM64x
, IRType ty
)
27932 Int alen
, i
, vscale
, count1
, count2
;
27934 UChar modrm
= getUChar(delta
);
27935 UInt rG
= gregOfRexRM(pfx
,modrm
);
27936 UInt rV
= getVexNvvvv(pfx
);
27938 IRType dstTy
= (isYMM
&& (ty
== Ity_I64
|| !isVM64x
)) ? Ity_V256
: Ity_V128
;
27939 IRType idxTy
= (isYMM
&& (ty
== Ity_I32
|| isVM64x
)) ? Ity_V256
: Ity_V128
;
27941 addr
= disAVSIBMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, &rI
,
27943 if (addr
== IRTemp_INVALID
|| rI
== rG
|| rI
== rV
|| rG
== rV
)
27945 if (dstTy
== Ity_V256
) {
27946 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rV
), dis_buf
, nameYMMReg(rG
) );
27948 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rV
), dis_buf
, nameXMMReg(rG
) );
27952 if (ty
== Ity_I32
) {
27953 count1
= isYMM
? 8 : 4;
27954 count2
= isVM64x
? count1
/ 2 : count1
;
27956 count1
= count2
= isYMM
? 4 : 2;
27959 /* First update the mask register to copies of the sign bit. */
27960 if (ty
== Ity_I32
) {
27962 putYMMReg( rV
, binop(Iop_SarN32x8
, getYMMReg( rV
), mkU8(31)) );
27964 putYMMRegLoAndZU( rV
, binop(Iop_SarN32x4
, getXMMReg( rV
), mkU8(31)) );
27966 for (i
= 0; i
< count1
; i
++) {
27967 putYMMRegLane64( rV
, i
, binop(Iop_Sar64
, getYMMRegLane64( rV
, i
),
27972 /* Next gather the individual elements. If any fault occurs, the
27973 corresponding mask element will be set and the loop stops. */
27974 for (i
= 0; i
< count2
; i
++) {
27975 IRExpr
*expr
, *addr_expr
;
27976 cond
= newTemp(Ity_I1
);
27978 binop(ty
== Ity_I32
? Iop_CmpLT32S
: Iop_CmpLT64S
,
27979 ty
== Ity_I32
? getYMMRegLane32( rV
, i
)
27980 : getYMMRegLane64( rV
, i
),
27982 expr
= ty
== Ity_I32
? getYMMRegLane32( rG
, i
)
27983 : getYMMRegLane64( rG
, i
);
27984 addr_expr
= isVM64x
? getYMMRegLane64( rI
, i
)
27985 : unop(Iop_32Sto64
, getYMMRegLane32( rI
, i
));
27987 case 2: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(1)); break;
27988 case 4: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(2)); break;
27989 case 8: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(3)); break;
27992 addr_expr
= binop(Iop_Add64
, mkexpr(addr
), addr_expr
);
27993 addr_expr
= handleAddrOverrides(vbi
, pfx
, addr_expr
);
27994 addr_expr
= IRExpr_ITE(mkexpr(cond
), addr_expr
, getIReg64(R_RSP
));
27995 expr
= IRExpr_ITE(mkexpr(cond
), loadLE(ty
, addr_expr
), expr
);
27996 if (ty
== Ity_I32
) {
27997 putYMMRegLane32( rG
, i
, expr
);
27998 putYMMRegLane32( rV
, i
, mkU32(0) );
28000 putYMMRegLane64( rG
, i
, expr
);
28001 putYMMRegLane64( rV
, i
, mkU64(0) );
28005 if (!isYMM
|| (ty
== Ity_I32
&& isVM64x
)) {
28006 if (ty
== Ity_I64
|| isYMM
)
28007 putYMMRegLane128( rV
, 1, mkV128(0) );
28008 else if (ty
== Ity_I32
&& count2
== 2) {
28009 putYMMRegLane64( rV
, 1, mkU64(0) );
28010 putYMMRegLane64( rG
, 1, mkU64(0) );
28012 putYMMRegLane128( rG
, 1, mkV128(0) );
28020 __attribute__((noinline
))
28022 Long
dis_ESC_0F38__VEX (
28023 /*MB_OUT*/DisResult
* dres
,
28024 /*OUT*/ Bool
* uses_vvvv
,
28025 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
28027 void* callback_opaque
,
28028 const VexArchInfo
* archinfo
,
28029 const VexAbiInfo
* vbi
,
28030 Prefix pfx
, Int sz
, Long deltaIN
28033 IRTemp addr
= IRTemp_INVALID
;
28036 Long delta
= deltaIN
;
28037 UChar opc
= getUChar(delta
);
28039 *uses_vvvv
= False
;
28044 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28045 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28046 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28047 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28048 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_XMM
);
28049 goto decode_success
;
28051 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28052 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28053 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28054 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28055 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_YMM
);
28056 goto decode_success
;
28063 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28064 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28065 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28066 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28067 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28069 goto decode_success
;
28071 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28072 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28073 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28074 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28075 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28077 goto decode_success
;
28082 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28083 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28084 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28085 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28086 math_PMADDUBSW_128
);
28087 goto decode_success
;
28089 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28090 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28091 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28092 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28093 math_PMADDUBSW_256
);
28094 goto decode_success
;
28101 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28102 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28103 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28104 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28105 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28107 goto decode_success
;
28109 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28110 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28111 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28112 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28113 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28115 goto decode_success
;
28122 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28123 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28124 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28125 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28126 IRTemp sV
= newTemp(Ity_V128
);
28127 IRTemp dV
= newTemp(Ity_V128
);
28128 IRTemp sHi
, sLo
, dHi
, dLo
;
28129 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28132 UChar modrm
= getUChar(delta
);
28133 UInt rG
= gregOfRexRM(pfx
,modrm
);
28134 UInt rV
= getVexNvvvv(pfx
);
28137 case 0x08: laneszB
= 1; ch
= 'b'; break;
28138 case 0x09: laneszB
= 2; ch
= 'w'; break;
28139 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28140 default: vassert(0);
28143 assign( dV
, getXMMReg(rV
) );
28145 if (epartIsReg(modrm
)) {
28146 UInt rE
= eregOfRexRM(pfx
,modrm
);
28147 assign( sV
, getXMMReg(rE
) );
28149 DIP("vpsign%c %s,%s,%s\n", ch
, nameXMMReg(rE
),
28150 nameXMMReg(rV
), nameXMMReg(rG
));
28152 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28153 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28155 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28156 nameXMMReg(rV
), nameXMMReg(rG
));
28159 breakupV128to64s( dV
, &dHi
, &dLo
);
28160 breakupV128to64s( sV
, &sHi
, &sLo
);
28164 binop(Iop_64HLtoV128
,
28165 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
28166 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
28170 goto decode_success
;
28172 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28173 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28174 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28175 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28176 IRTemp sV
= newTemp(Ity_V256
);
28177 IRTemp dV
= newTemp(Ity_V256
);
28178 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28179 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
28180 d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28183 UChar modrm
= getUChar(delta
);
28184 UInt rG
= gregOfRexRM(pfx
,modrm
);
28185 UInt rV
= getVexNvvvv(pfx
);
28188 case 0x08: laneszB
= 1; ch
= 'b'; break;
28189 case 0x09: laneszB
= 2; ch
= 'w'; break;
28190 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28191 default: vassert(0);
28194 assign( dV
, getYMMReg(rV
) );
28196 if (epartIsReg(modrm
)) {
28197 UInt rE
= eregOfRexRM(pfx
,modrm
);
28198 assign( sV
, getYMMReg(rE
) );
28200 DIP("vpsign%c %s,%s,%s\n", ch
, nameYMMReg(rE
),
28201 nameYMMReg(rV
), nameYMMReg(rG
));
28203 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28204 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28206 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28207 nameYMMReg(rV
), nameYMMReg(rG
));
28210 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28211 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28215 binop( Iop_V128HLtoV256
,
28216 binop(Iop_64HLtoV128
,
28217 dis_PSIGN_helper( mkexpr(s3
), mkexpr(d3
), laneszB
),
28218 dis_PSIGN_helper( mkexpr(s2
), mkexpr(d2
), laneszB
)
28220 binop(Iop_64HLtoV128
,
28221 dis_PSIGN_helper( mkexpr(s1
), mkexpr(d1
), laneszB
),
28222 dis_PSIGN_helper( mkexpr(s0
), mkexpr(d0
), laneszB
)
28227 goto decode_success
;
28232 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28233 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28234 IRTemp sV
= newTemp(Ity_V128
);
28235 IRTemp dV
= newTemp(Ity_V128
);
28236 IRTemp sHi
, sLo
, dHi
, dLo
;
28237 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28238 UChar modrm
= getUChar(delta
);
28239 UInt rG
= gregOfRexRM(pfx
,modrm
);
28240 UInt rV
= getVexNvvvv(pfx
);
28242 assign( dV
, getXMMReg(rV
) );
28244 if (epartIsReg(modrm
)) {
28245 UInt rE
= eregOfRexRM(pfx
,modrm
);
28246 assign( sV
, getXMMReg(rE
) );
28248 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE
),
28249 nameXMMReg(rV
), nameXMMReg(rG
));
28251 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28252 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28254 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28255 nameXMMReg(rV
), nameXMMReg(rG
));
28258 breakupV128to64s( dV
, &dHi
, &dLo
);
28259 breakupV128to64s( sV
, &sHi
, &sLo
);
28263 binop(Iop_64HLtoV128
,
28264 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
28265 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
28269 goto decode_success
;
28271 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28272 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28273 IRTemp sV
= newTemp(Ity_V256
);
28274 IRTemp dV
= newTemp(Ity_V256
);
28275 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28276 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28277 UChar modrm
= getUChar(delta
);
28278 UInt rG
= gregOfRexRM(pfx
,modrm
);
28279 UInt rV
= getVexNvvvv(pfx
);
28281 assign( dV
, getYMMReg(rV
) );
28283 if (epartIsReg(modrm
)) {
28284 UInt rE
= eregOfRexRM(pfx
,modrm
);
28285 assign( sV
, getYMMReg(rE
) );
28287 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE
),
28288 nameYMMReg(rV
), nameYMMReg(rG
));
28290 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28291 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28293 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28294 nameYMMReg(rV
), nameYMMReg(rG
));
28297 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28298 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28302 binop(Iop_V128HLtoV256
,
28303 binop(Iop_64HLtoV128
,
28304 dis_PMULHRSW_helper( mkexpr(s3
), mkexpr(d3
) ),
28305 dis_PMULHRSW_helper( mkexpr(s2
), mkexpr(d2
) ) ),
28306 binop(Iop_64HLtoV128
,
28307 dis_PMULHRSW_helper( mkexpr(s1
), mkexpr(d1
) ),
28308 dis_PMULHRSW_helper( mkexpr(s0
), mkexpr(d0
) ) )
28312 dres
->hint
= Dis_HintVerbose
;
28313 goto decode_success
;
28318 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28319 if (have66noF2noF3(pfx
)
28320 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28321 UChar modrm
= getUChar(delta
);
28322 UInt rG
= gregOfRexRM(pfx
, modrm
);
28323 UInt rV
= getVexNvvvv(pfx
);
28324 IRTemp ctrlV
= newTemp(Ity_V128
);
28325 if (epartIsReg(modrm
)) {
28326 UInt rE
= eregOfRexRM(pfx
, modrm
);
28328 DIP("vpermilps %s,%s,%s\n",
28329 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28330 assign(ctrlV
, getXMMReg(rE
));
28332 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28334 DIP("vpermilps %s,%s,%s\n",
28335 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28336 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28338 IRTemp dataV
= newTemp(Ity_V128
);
28339 assign(dataV
, getXMMReg(rV
));
28340 IRTemp resV
= math_PERMILPS_VAR_128(dataV
, ctrlV
);
28341 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28343 goto decode_success
;
28345 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28346 if (have66noF2noF3(pfx
)
28347 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28348 UChar modrm
= getUChar(delta
);
28349 UInt rG
= gregOfRexRM(pfx
, modrm
);
28350 UInt rV
= getVexNvvvv(pfx
);
28351 IRTemp ctrlV
= newTemp(Ity_V256
);
28352 if (epartIsReg(modrm
)) {
28353 UInt rE
= eregOfRexRM(pfx
, modrm
);
28355 DIP("vpermilps %s,%s,%s\n",
28356 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28357 assign(ctrlV
, getYMMReg(rE
));
28359 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28361 DIP("vpermilps %s,%s,%s\n",
28362 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28363 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28365 IRTemp dataV
= newTemp(Ity_V256
);
28366 assign(dataV
, getYMMReg(rV
));
28367 IRTemp resV
= math_PERMILPS_VAR_256(dataV
, ctrlV
);
28368 putYMMReg(rG
, mkexpr(resV
));
28370 goto decode_success
;
28375 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28376 if (have66noF2noF3(pfx
)
28377 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28378 UChar modrm
= getUChar(delta
);
28379 UInt rG
= gregOfRexRM(pfx
, modrm
);
28380 UInt rV
= getVexNvvvv(pfx
);
28381 IRTemp ctrlV
= newTemp(Ity_V128
);
28382 if (epartIsReg(modrm
)) {
28383 UInt rE
= eregOfRexRM(pfx
, modrm
);
28385 DIP("vpermilpd %s,%s,%s\n",
28386 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28387 assign(ctrlV
, getXMMReg(rE
));
28389 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28391 DIP("vpermilpd %s,%s,%s\n",
28392 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28393 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28395 IRTemp dataV
= newTemp(Ity_V128
);
28396 assign(dataV
, getXMMReg(rV
));
28397 IRTemp resV
= math_PERMILPD_VAR_128(dataV
, ctrlV
);
28398 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28400 goto decode_success
;
28402 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28403 if (have66noF2noF3(pfx
)
28404 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28405 UChar modrm
= getUChar(delta
);
28406 UInt rG
= gregOfRexRM(pfx
, modrm
);
28407 UInt rV
= getVexNvvvv(pfx
);
28408 IRTemp ctrlV
= newTemp(Ity_V256
);
28409 if (epartIsReg(modrm
)) {
28410 UInt rE
= eregOfRexRM(pfx
, modrm
);
28412 DIP("vpermilpd %s,%s,%s\n",
28413 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28414 assign(ctrlV
, getYMMReg(rE
));
28416 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28418 DIP("vpermilpd %s,%s,%s\n",
28419 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28420 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28422 IRTemp dataV
= newTemp(Ity_V256
);
28423 assign(dataV
, getYMMReg(rV
));
28424 IRTemp resV
= math_PERMILPD_VAR_256(dataV
, ctrlV
);
28425 putYMMReg(rG
, mkexpr(resV
));
28427 goto decode_success
;
28432 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28433 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28434 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 32 );
28435 goto decode_success
;
28437 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28438 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28439 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 32 );
28440 goto decode_success
;
28445 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28446 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28447 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 64 );
28448 goto decode_success
;
28450 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28451 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28452 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 64 );
28453 goto decode_success
;
28458 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28459 if (have66noF2noF3(pfx
)
28460 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28461 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28462 uses_vvvv
, vbi
, pfx
, delta
, "vpermps", math_VPERMD
);
28463 goto decode_success
;
28468 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28469 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28470 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 0 );
28471 goto decode_success
;
28473 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28474 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28475 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 0 );
28476 goto decode_success
;
28481 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28482 if (have66noF2noF3(pfx
)
28483 && 0==getVexL(pfx
)/*128*/
28484 && !epartIsReg(getUChar(delta
))) {
28485 UChar modrm
= getUChar(delta
);
28486 UInt rG
= gregOfRexRM(pfx
, modrm
);
28487 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28489 DIP("vbroadcastss %s,%s\n", dis_buf
, nameXMMReg(rG
));
28490 IRTemp t32
= newTemp(Ity_I32
);
28491 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28492 IRTemp t64
= newTemp(Ity_I64
);
28493 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28494 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28495 putYMMRegLoAndZU(rG
, res
);
28496 goto decode_success
;
28498 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28499 if (have66noF2noF3(pfx
)
28500 && 1==getVexL(pfx
)/*256*/
28501 && !epartIsReg(getUChar(delta
))) {
28502 UChar modrm
= getUChar(delta
);
28503 UInt rG
= gregOfRexRM(pfx
, modrm
);
28504 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28506 DIP("vbroadcastss %s,%s\n", dis_buf
, nameYMMReg(rG
));
28507 IRTemp t32
= newTemp(Ity_I32
);
28508 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28509 IRTemp t64
= newTemp(Ity_I64
);
28510 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28511 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28512 mkexpr(t64
), mkexpr(t64
));
28513 putYMMReg(rG
, res
);
28514 goto decode_success
;
28516 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28517 if (have66noF2noF3(pfx
)
28518 && 0==getVexL(pfx
)/*128*/
28519 && epartIsReg(getUChar(delta
))) {
28520 UChar modrm
= getUChar(delta
);
28521 UInt rG
= gregOfRexRM(pfx
, modrm
);
28522 UInt rE
= eregOfRexRM(pfx
, modrm
);
28523 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
28524 IRTemp t32
= newTemp(Ity_I32
);
28525 assign(t32
, getXMMRegLane32(rE
, 0));
28526 IRTemp t64
= newTemp(Ity_I64
);
28527 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28528 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28529 putYMMRegLoAndZU(rG
, res
);
28531 goto decode_success
;
28533 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28534 if (have66noF2noF3(pfx
)
28535 && 1==getVexL(pfx
)/*256*/
28536 && epartIsReg(getUChar(delta
))) {
28537 UChar modrm
= getUChar(delta
);
28538 UInt rG
= gregOfRexRM(pfx
, modrm
);
28539 UInt rE
= eregOfRexRM(pfx
, modrm
);
28540 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28541 IRTemp t32
= newTemp(Ity_I32
);
28542 assign(t32
, getXMMRegLane32(rE
, 0));
28543 IRTemp t64
= newTemp(Ity_I64
);
28544 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28545 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28546 mkexpr(t64
), mkexpr(t64
));
28547 putYMMReg(rG
, res
);
28549 goto decode_success
;
28554 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28555 if (have66noF2noF3(pfx
)
28556 && 1==getVexL(pfx
)/*256*/
28557 && !epartIsReg(getUChar(delta
))) {
28558 UChar modrm
= getUChar(delta
);
28559 UInt rG
= gregOfRexRM(pfx
, modrm
);
28560 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28562 DIP("vbroadcastsd %s,%s\n", dis_buf
, nameYMMReg(rG
));
28563 IRTemp t64
= newTemp(Ity_I64
);
28564 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
28565 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28566 mkexpr(t64
), mkexpr(t64
));
28567 putYMMReg(rG
, res
);
28568 goto decode_success
;
28570 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28571 if (have66noF2noF3(pfx
)
28572 && 1==getVexL(pfx
)/*256*/
28573 && epartIsReg(getUChar(delta
))) {
28574 UChar modrm
= getUChar(delta
);
28575 UInt rG
= gregOfRexRM(pfx
, modrm
);
28576 UInt rE
= eregOfRexRM(pfx
, modrm
);
28577 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28578 IRTemp t64
= newTemp(Ity_I64
);
28579 assign(t64
, getXMMRegLane64(rE
, 0));
28580 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28581 mkexpr(t64
), mkexpr(t64
));
28582 putYMMReg(rG
, res
);
28584 goto decode_success
;
28589 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28590 if (have66noF2noF3(pfx
)
28591 && 1==getVexL(pfx
)/*256*/
28592 && !epartIsReg(getUChar(delta
))) {
28593 UChar modrm
= getUChar(delta
);
28594 UInt rG
= gregOfRexRM(pfx
, modrm
);
28595 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28597 DIP("vbroadcastf128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
28598 IRTemp t128
= newTemp(Ity_V128
);
28599 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
28600 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
28601 goto decode_success
;
28606 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28607 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28608 delta
= dis_AVX128_E_to_G_unary(
28609 uses_vvvv
, vbi
, pfx
, delta
,
28610 "vpabsb", math_PABS_XMM_pap1
);
28611 goto decode_success
;
28613 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28614 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28615 delta
= dis_AVX256_E_to_G_unary(
28616 uses_vvvv
, vbi
, pfx
, delta
,
28617 "vpabsb", math_PABS_YMM_pap1
);
28618 goto decode_success
;
28623 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28624 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28625 delta
= dis_AVX128_E_to_G_unary(
28626 uses_vvvv
, vbi
, pfx
, delta
,
28627 "vpabsw", math_PABS_XMM_pap2
);
28628 goto decode_success
;
28630 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28631 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28632 delta
= dis_AVX256_E_to_G_unary(
28633 uses_vvvv
, vbi
, pfx
, delta
,
28634 "vpabsw", math_PABS_YMM_pap2
);
28635 goto decode_success
;
28640 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28641 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28642 delta
= dis_AVX128_E_to_G_unary(
28643 uses_vvvv
, vbi
, pfx
, delta
,
28644 "vpabsd", math_PABS_XMM_pap4
);
28645 goto decode_success
;
28647 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28648 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28649 delta
= dis_AVX256_E_to_G_unary(
28650 uses_vvvv
, vbi
, pfx
, delta
,
28651 "vpabsd", math_PABS_YMM_pap4
);
28652 goto decode_success
;
28657 /* VPMOVSXBW xmm2/m64, xmm1 */
28658 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28659 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28660 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
28661 True
/*isAvx*/, False
/*!xIsZ*/ );
28662 goto decode_success
;
28664 /* VPMOVSXBW xmm2/m128, ymm1 */
28665 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28666 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28667 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28668 goto decode_success
;
28673 /* VPMOVSXBD xmm2/m32, xmm1 */
28674 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28675 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28676 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
28677 True
/*isAvx*/, False
/*!xIsZ*/ );
28678 goto decode_success
;
28680 /* VPMOVSXBD xmm2/m64, ymm1 */
28681 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28682 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28683 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28684 goto decode_success
;
28689 /* VPMOVSXBQ xmm2/m16, xmm1 */
28690 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28691 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28692 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28693 goto decode_success
;
28695 /* VPMOVSXBQ xmm2/m32, ymm1 */
28696 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28697 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28698 delta
= dis_PMOVSXBQ_256( vbi
, pfx
, delta
);
28699 goto decode_success
;
28704 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28705 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28706 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
28707 True
/*isAvx*/, False
/*!xIsZ*/ );
28708 goto decode_success
;
28710 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28711 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28712 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28713 goto decode_success
;
28718 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28719 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28720 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28721 goto decode_success
;
28723 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28724 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28725 delta
= dis_PMOVSXWQ_256( vbi
, pfx
, delta
);
28726 goto decode_success
;
28731 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28732 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28733 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
28734 True
/*isAvx*/, False
/*!xIsZ*/ );
28735 goto decode_success
;
28737 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28738 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28739 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28740 goto decode_success
;
28745 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28746 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28747 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28748 uses_vvvv
, vbi
, pfx
, delta
,
28749 "vpmuldq", math_PMULDQ_128
);
28750 goto decode_success
;
28752 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28753 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28754 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28755 uses_vvvv
, vbi
, pfx
, delta
,
28756 "vpmuldq", math_PMULDQ_256
);
28757 goto decode_success
;
28762 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28763 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28764 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28765 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28766 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x2
);
28767 goto decode_success
;
28769 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28770 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28771 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28772 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28773 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x4
);
28774 goto decode_success
;
28779 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28780 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28781 && !epartIsReg(getUChar(delta
))) {
28782 UChar modrm
= getUChar(delta
);
28783 UInt rD
= gregOfRexRM(pfx
, modrm
);
28784 IRTemp tD
= newTemp(Ity_V128
);
28785 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28787 gen_SEGV_if_not_16_aligned(addr
);
28788 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
28789 DIP("vmovntdqa %s,%s\n", dis_buf
, nameXMMReg(rD
));
28790 putYMMRegLoAndZU(rD
, mkexpr(tD
));
28791 goto decode_success
;
28793 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28794 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28795 && !epartIsReg(getUChar(delta
))) {
28796 UChar modrm
= getUChar(delta
);
28797 UInt rD
= gregOfRexRM(pfx
, modrm
);
28798 IRTemp tD
= newTemp(Ity_V256
);
28799 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28801 gen_SEGV_if_not_32_aligned(addr
);
28802 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
28803 DIP("vmovntdqa %s,%s\n", dis_buf
, nameYMMReg(rD
));
28804 putYMMReg(rD
, mkexpr(tD
));
28805 goto decode_success
;
28810 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28811 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28812 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28813 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28814 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28815 Iop_QNarrowBin32Sto16Ux8
, NULL
,
28816 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
28817 goto decode_success
;
28819 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28820 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28821 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28822 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28823 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28824 math_VPACKUSDW_YMM
);
28825 goto decode_success
;
28830 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28831 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28832 && 0==getRexW(pfx
)/*W0*/
28833 && !epartIsReg(getUChar(delta
))) {
28834 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28835 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
28836 goto decode_success
;
28838 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28839 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28840 && 0==getRexW(pfx
)/*W0*/
28841 && !epartIsReg(getUChar(delta
))) {
28842 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28843 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
28844 goto decode_success
;
28849 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
28850 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28851 && 0==getRexW(pfx
)/*W0*/
28852 && !epartIsReg(getUChar(delta
))) {
28853 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28854 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
28855 goto decode_success
;
28857 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
28858 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28859 && 0==getRexW(pfx
)/*W0*/
28860 && !epartIsReg(getUChar(delta
))) {
28861 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28862 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
28863 goto decode_success
;
28868 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
28869 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28870 && 0==getRexW(pfx
)/*W0*/
28871 && !epartIsReg(getUChar(delta
))) {
28872 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28873 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
28874 goto decode_success
;
28876 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
28877 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28878 && 0==getRexW(pfx
)/*W0*/
28879 && !epartIsReg(getUChar(delta
))) {
28880 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28881 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
28882 goto decode_success
;
28887 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
28888 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28889 && 0==getRexW(pfx
)/*W0*/
28890 && !epartIsReg(getUChar(delta
))) {
28891 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28892 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
28893 goto decode_success
;
28895 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
28896 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28897 && 0==getRexW(pfx
)/*W0*/
28898 && !epartIsReg(getUChar(delta
))) {
28899 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
28900 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
28901 goto decode_success
;
28906 /* VPMOVZXBW xmm2/m64, xmm1 */
28907 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
28908 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28909 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
28910 True
/*isAvx*/, True
/*xIsZ*/ );
28911 goto decode_success
;
28913 /* VPMOVZXBW xmm2/m128, ymm1 */
28914 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
28915 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28916 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28917 goto decode_success
;
28922 /* VPMOVZXBD xmm2/m32, xmm1 */
28923 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
28924 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28925 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
28926 True
/*isAvx*/, True
/*xIsZ*/ );
28927 goto decode_success
;
28929 /* VPMOVZXBD xmm2/m64, ymm1 */
28930 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
28931 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28932 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28933 goto decode_success
;
28938 /* VPMOVZXBQ xmm2/m16, xmm1 */
28939 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
28940 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28941 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28942 goto decode_success
;
28944 /* VPMOVZXBQ xmm2/m32, ymm1 */
28945 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
28946 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28947 delta
= dis_PMOVZXBQ_256( vbi
, pfx
, delta
);
28948 goto decode_success
;
28953 /* VPMOVZXWD xmm2/m64, xmm1 */
28954 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
28955 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28956 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
28957 True
/*isAvx*/, True
/*xIsZ*/ );
28958 goto decode_success
;
28960 /* VPMOVZXWD xmm2/m128, ymm1 */
28961 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
28962 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28963 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28964 goto decode_success
;
28969 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
28970 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28971 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28972 goto decode_success
;
28974 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
28975 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28976 delta
= dis_PMOVZXWQ_256( vbi
, pfx
, delta
);
28977 goto decode_success
;
28982 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
28983 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28984 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
28985 True
/*isAvx*/, True
/*xIsZ*/ );
28986 goto decode_success
;
28988 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
28989 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28990 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
28991 goto decode_success
;
28996 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
28997 if (have66noF2noF3(pfx
)
28998 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28999 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29000 uses_vvvv
, vbi
, pfx
, delta
, "vpermd", math_VPERMD
);
29001 goto decode_success
;
29006 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29007 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29008 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29009 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29010 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx2
);
29011 goto decode_success
;
29013 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29014 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29015 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29016 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29017 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx4
);
29018 goto decode_success
;
29023 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29024 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29025 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29026 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29027 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx16
);
29028 goto decode_success
;
29030 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29031 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29032 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29033 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29034 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx32
);
29035 goto decode_success
;
29040 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29041 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29042 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29043 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29044 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx4
);
29045 goto decode_success
;
29047 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29048 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29049 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29050 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29051 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx8
);
29052 goto decode_success
;
29057 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29058 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29059 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29060 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29061 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux8
);
29062 goto decode_success
;
29064 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29065 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29066 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29067 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29068 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux16
);
29069 goto decode_success
;
29074 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29075 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29076 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29077 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29078 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux4
);
29079 goto decode_success
;
29081 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29082 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29083 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29084 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29085 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux8
);
29086 goto decode_success
;
29091 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29092 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29093 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29094 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29095 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx16
);
29096 goto decode_success
;
29098 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29099 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29100 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29101 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29102 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx32
);
29103 goto decode_success
;
29108 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29109 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29110 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29111 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29112 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx4
);
29113 goto decode_success
;
29115 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29116 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29117 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29118 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29119 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx8
);
29120 goto decode_success
;
29125 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29126 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29127 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29128 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29129 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux8
);
29130 goto decode_success
;
29132 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29133 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29134 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29135 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29136 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux16
);
29137 goto decode_success
;
29142 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29143 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29144 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29145 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29146 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux4
);
29147 goto decode_success
;
29149 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29150 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29151 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29152 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29153 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux8
);
29154 goto decode_success
;
29159 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29160 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29161 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29162 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29163 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x4
);
29164 goto decode_success
;
29166 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29167 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29168 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29169 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29170 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x8
);
29171 goto decode_success
;
29176 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29177 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29178 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29179 goto decode_success
;
29184 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29185 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29186 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29187 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvd",
29188 Iop_Shr32
, 1==getVexL(pfx
) );
29190 goto decode_success
;
29192 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29193 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29194 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29195 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvq",
29196 Iop_Shr64
, 1==getVexL(pfx
) );
29198 goto decode_success
;
29203 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29204 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29205 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29206 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsravd",
29207 Iop_Sar32
, 1==getVexL(pfx
) );
29209 goto decode_success
;
29214 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29215 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29216 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29217 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvd",
29218 Iop_Shl32
, 1==getVexL(pfx
) );
29220 goto decode_success
;
29222 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29223 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29224 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29225 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvq",
29226 Iop_Shl64
, 1==getVexL(pfx
) );
29228 goto decode_success
;
29233 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29234 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29235 && 0==getRexW(pfx
)/*W0*/) {
29236 UChar modrm
= getUChar(delta
);
29237 UInt rG
= gregOfRexRM(pfx
, modrm
);
29238 IRTemp t32
= newTemp(Ity_I32
);
29239 if (epartIsReg(modrm
)) {
29240 UInt rE
= eregOfRexRM(pfx
, modrm
);
29242 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29243 assign(t32
, getXMMRegLane32(rE
, 0));
29245 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29247 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameXMMReg(rG
));
29248 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29250 IRTemp t64
= newTemp(Ity_I64
);
29251 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29252 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29253 putYMMRegLoAndZU(rG
, res
);
29254 goto decode_success
;
29256 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29257 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29258 && 0==getRexW(pfx
)/*W0*/) {
29259 UChar modrm
= getUChar(delta
);
29260 UInt rG
= gregOfRexRM(pfx
, modrm
);
29261 IRTemp t32
= newTemp(Ity_I32
);
29262 if (epartIsReg(modrm
)) {
29263 UInt rE
= eregOfRexRM(pfx
, modrm
);
29265 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29266 assign(t32
, getXMMRegLane32(rE
, 0));
29268 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29270 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameYMMReg(rG
));
29271 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29273 IRTemp t64
= newTemp(Ity_I64
);
29274 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29275 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29276 mkexpr(t64
), mkexpr(t64
));
29277 putYMMReg(rG
, res
);
29278 goto decode_success
;
29283 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29284 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29285 && 0==getRexW(pfx
)/*W0*/) {
29286 UChar modrm
= getUChar(delta
);
29287 UInt rG
= gregOfRexRM(pfx
, modrm
);
29288 IRTemp t64
= newTemp(Ity_I64
);
29289 if (epartIsReg(modrm
)) {
29290 UInt rE
= eregOfRexRM(pfx
, modrm
);
29292 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29293 assign(t64
, getXMMRegLane64(rE
, 0));
29295 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29297 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameXMMReg(rG
));
29298 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29300 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29301 putYMMRegLoAndZU(rG
, res
);
29302 goto decode_success
;
29304 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29305 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29306 && 0==getRexW(pfx
)/*W0*/) {
29307 UChar modrm
= getUChar(delta
);
29308 UInt rG
= gregOfRexRM(pfx
, modrm
);
29309 IRTemp t64
= newTemp(Ity_I64
);
29310 if (epartIsReg(modrm
)) {
29311 UInt rE
= eregOfRexRM(pfx
, modrm
);
29313 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29314 assign(t64
, getXMMRegLane64(rE
, 0));
29316 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29318 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameYMMReg(rG
));
29319 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29321 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29322 mkexpr(t64
), mkexpr(t64
));
29323 putYMMReg(rG
, res
);
29324 goto decode_success
;
29329 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29330 if (have66noF2noF3(pfx
)
29331 && 1==getVexL(pfx
)/*256*/
29332 && !epartIsReg(getUChar(delta
))) {
29333 UChar modrm
= getUChar(delta
);
29334 UInt rG
= gregOfRexRM(pfx
, modrm
);
29335 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29337 DIP("vbroadcasti128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
29338 IRTemp t128
= newTemp(Ity_V128
);
29339 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
29340 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
29341 goto decode_success
;
29346 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29347 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29348 && 0==getRexW(pfx
)/*W0*/) {
29349 UChar modrm
= getUChar(delta
);
29350 UInt rG
= gregOfRexRM(pfx
, modrm
);
29351 IRTemp t8
= newTemp(Ity_I8
);
29352 if (epartIsReg(modrm
)) {
29353 UInt rE
= eregOfRexRM(pfx
, modrm
);
29355 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29356 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29358 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29360 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameXMMReg(rG
));
29361 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29363 IRTemp t16
= newTemp(Ity_I16
);
29364 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29365 IRTemp t32
= newTemp(Ity_I32
);
29366 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29367 IRTemp t64
= newTemp(Ity_I64
);
29368 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29369 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29370 putYMMRegLoAndZU(rG
, res
);
29371 goto decode_success
;
29373 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29374 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29375 && 0==getRexW(pfx
)/*W0*/) {
29376 UChar modrm
= getUChar(delta
);
29377 UInt rG
= gregOfRexRM(pfx
, modrm
);
29378 IRTemp t8
= newTemp(Ity_I8
);
29379 if (epartIsReg(modrm
)) {
29380 UInt rE
= eregOfRexRM(pfx
, modrm
);
29382 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29383 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29385 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29387 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameYMMReg(rG
));
29388 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29390 IRTemp t16
= newTemp(Ity_I16
);
29391 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29392 IRTemp t32
= newTemp(Ity_I32
);
29393 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29394 IRTemp t64
= newTemp(Ity_I64
);
29395 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29396 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29397 mkexpr(t64
), mkexpr(t64
));
29398 putYMMReg(rG
, res
);
29399 goto decode_success
;
29404 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29405 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29406 && 0==getRexW(pfx
)/*W0*/) {
29407 UChar modrm
= getUChar(delta
);
29408 UInt rG
= gregOfRexRM(pfx
, modrm
);
29409 IRTemp t16
= newTemp(Ity_I16
);
29410 if (epartIsReg(modrm
)) {
29411 UInt rE
= eregOfRexRM(pfx
, modrm
);
29413 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29414 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29416 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29418 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameXMMReg(rG
));
29419 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29421 IRTemp t32
= newTemp(Ity_I32
);
29422 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29423 IRTemp t64
= newTemp(Ity_I64
);
29424 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29425 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29426 putYMMRegLoAndZU(rG
, res
);
29427 goto decode_success
;
29429 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29430 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29431 && 0==getRexW(pfx
)/*W0*/) {
29432 UChar modrm
= getUChar(delta
);
29433 UInt rG
= gregOfRexRM(pfx
, modrm
);
29434 IRTemp t16
= newTemp(Ity_I16
);
29435 if (epartIsReg(modrm
)) {
29436 UInt rE
= eregOfRexRM(pfx
, modrm
);
29438 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29439 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29441 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29443 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameYMMReg(rG
));
29444 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29446 IRTemp t32
= newTemp(Ity_I32
);
29447 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29448 IRTemp t64
= newTemp(Ity_I64
);
29449 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29450 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29451 mkexpr(t64
), mkexpr(t64
));
29452 putYMMReg(rG
, res
);
29453 goto decode_success
;
29458 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29459 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29460 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29461 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29462 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
29463 goto decode_success
;
29465 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29466 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29467 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29468 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29469 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
29470 goto decode_success
;
29472 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29473 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29474 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29475 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29476 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
29477 goto decode_success
;
29479 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29480 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29481 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29482 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29483 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
29484 goto decode_success
;
29489 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29490 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29491 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29492 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29493 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
29494 goto decode_success
;
29496 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29497 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29498 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29499 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29500 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
29501 goto decode_success
;
29503 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29504 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29505 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29506 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29507 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
29508 goto decode_success
;
29510 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29511 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29512 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29513 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29514 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
29515 goto decode_success
;
29520 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29521 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29522 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29523 Long delta0
= delta
;
29524 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29525 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29526 if (delta
!= delta0
)
29527 goto decode_success
;
29529 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29530 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29531 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29532 Long delta0
= delta
;
29533 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29534 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29535 if (delta
!= delta0
)
29536 goto decode_success
;
29538 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29539 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29540 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29541 Long delta0
= delta
;
29542 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29543 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29544 if (delta
!= delta0
)
29545 goto decode_success
;
29547 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29548 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29549 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29550 Long delta0
= delta
;
29551 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29552 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29553 if (delta
!= delta0
)
29554 goto decode_success
;
29559 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29560 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29561 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29562 Long delta0
= delta
;
29563 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29564 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29565 if (delta
!= delta0
)
29566 goto decode_success
;
29568 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29569 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29570 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29571 Long delta0
= delta
;
29572 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29573 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29574 if (delta
!= delta0
)
29575 goto decode_success
;
29577 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29578 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29579 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29580 Long delta0
= delta
;
29581 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29582 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29583 if (delta
!= delta0
)
29584 goto decode_success
;
29586 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29587 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29588 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29589 Long delta0
= delta
;
29590 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29591 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29592 if (delta
!= delta0
)
29593 goto decode_success
;
29598 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29599 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29600 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29601 Long delta0
= delta
;
29602 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29603 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29604 if (delta
!= delta0
)
29605 goto decode_success
;
29607 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29608 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29609 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29610 Long delta0
= delta
;
29611 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29612 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29613 if (delta
!= delta0
)
29614 goto decode_success
;
29616 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29617 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29618 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29619 Long delta0
= delta
;
29620 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29621 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29622 if (delta
!= delta0
)
29623 goto decode_success
;
29625 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29626 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29627 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29628 Long delta0
= delta
;
29629 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29630 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29631 if (delta
!= delta0
)
29632 goto decode_success
;
29637 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29638 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29639 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29640 Long delta0
= delta
;
29641 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29642 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29643 if (delta
!= delta0
)
29644 goto decode_success
;
29646 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29647 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29648 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29649 Long delta0
= delta
;
29650 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29651 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29652 if (delta
!= delta0
)
29653 goto decode_success
;
29655 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29656 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29657 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29658 Long delta0
= delta
;
29659 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29660 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29661 if (delta
!= delta0
)
29662 goto decode_success
;
29664 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29665 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29666 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29667 Long delta0
= delta
;
29668 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29669 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29670 if (delta
!= delta0
)
29671 goto decode_success
;
29675 case 0x96 ... 0x9F:
29676 case 0xA6 ... 0xAF:
29677 case 0xB6 ... 0xBF:
29678 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29679 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29680 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29681 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29682 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29683 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29684 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29685 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29686 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29687 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29688 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29689 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29690 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29691 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29692 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29693 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29694 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29695 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29696 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29697 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29698 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29699 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29700 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29701 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29702 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29703 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29704 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29705 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29706 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29707 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29708 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29709 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29710 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29711 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29712 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29713 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29714 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29715 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29716 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29717 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29718 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29719 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29720 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29721 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29722 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29723 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29724 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29725 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29726 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29727 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29728 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29729 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29730 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29731 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29732 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29733 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29734 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29735 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29736 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29737 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29738 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29739 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29740 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29741 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29742 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29743 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29744 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29745 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29746 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29747 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29748 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29749 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29750 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29751 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29752 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29753 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29754 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29755 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29756 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29757 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29758 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29759 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29760 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29761 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29762 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29763 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29764 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29765 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29766 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29767 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29768 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29769 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29770 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29771 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29772 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29773 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29774 if (have66noF2noF3(pfx
)) {
29775 delta
= dis_FMA( vbi
, pfx
, delta
, opc
);
29777 dres
->hint
= Dis_HintVerbose
;
29778 goto decode_success
;
29787 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29788 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29789 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29790 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29791 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29792 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29793 delta
= dis_AESx( vbi
, pfx
, delta
, True
/*!isAvx*/, opc
);
29794 if (opc
!= 0xDB) *uses_vvvv
= True
;
29795 goto decode_success
;
29800 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29801 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29802 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
29803 Int size
= getRexW(pfx
) ? 8 : 4;
29804 IRType ty
= szToITy(size
);
29805 IRTemp dst
= newTemp(ty
);
29806 IRTemp src1
= newTemp(ty
);
29807 IRTemp src2
= newTemp(ty
);
29808 UChar rm
= getUChar(delta
);
29810 assign( src1
, getIRegV(size
,pfx
) );
29811 if (epartIsReg(rm
)) {
29812 assign( src2
, getIRegE(size
,pfx
,rm
) );
29813 DIP("andn %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
29814 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
29817 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29818 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
29819 DIP("andn %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
29820 nameIRegG(size
,pfx
,rm
));
29824 assign( dst
, binop( mkSizedOp(ty
,Iop_And8
),
29825 unop( mkSizedOp(ty
,Iop_Not8
), mkexpr(src1
) ),
29827 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
29828 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29829 ? AMD64G_CC_OP_ANDN64
29830 : AMD64G_CC_OP_ANDN32
)) );
29831 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29832 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
29834 goto decode_success
;
29839 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29840 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29841 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
29842 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 3) {
29843 Int size
= getRexW(pfx
) ? 8 : 4;
29844 IRType ty
= szToITy(size
);
29845 IRTemp src
= newTemp(ty
);
29846 IRTemp dst
= newTemp(ty
);
29847 UChar rm
= getUChar(delta
);
29849 if (epartIsReg(rm
)) {
29850 assign( src
, getIRegE(size
,pfx
,rm
) );
29851 DIP("blsi %s,%s\n", nameIRegE(size
,pfx
,rm
),
29852 nameIRegV(size
,pfx
));
29855 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29856 assign( src
, loadLE(ty
, mkexpr(addr
)) );
29857 DIP("blsi %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
29861 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
29862 binop(mkSizedOp(ty
,Iop_Sub8
), mkU(ty
, 0),
29863 mkexpr(src
)), mkexpr(src
)) );
29864 putIRegV( size
, pfx
, mkexpr(dst
) );
29865 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29866 ? AMD64G_CC_OP_BLSI64
29867 : AMD64G_CC_OP_BLSI32
)) );
29868 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29869 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
29871 goto decode_success
;
29873 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
29874 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
29875 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
29876 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 2) {
29877 Int size
= getRexW(pfx
) ? 8 : 4;
29878 IRType ty
= szToITy(size
);
29879 IRTemp src
= newTemp(ty
);
29880 IRTemp dst
= newTemp(ty
);
29881 UChar rm
= getUChar(delta
);
29883 if (epartIsReg(rm
)) {
29884 assign( src
, getIRegE(size
,pfx
,rm
) );
29885 DIP("blsmsk %s,%s\n", nameIRegE(size
,pfx
,rm
),
29886 nameIRegV(size
,pfx
));
29889 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29890 assign( src
, loadLE(ty
, mkexpr(addr
)) );
29891 DIP("blsmsk %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
29895 assign( dst
, binop(mkSizedOp(ty
,Iop_Xor8
),
29896 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
29897 mkU(ty
, 1)), mkexpr(src
)) );
29898 putIRegV( size
, pfx
, mkexpr(dst
) );
29899 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29900 ? AMD64G_CC_OP_BLSMSK64
29901 : AMD64G_CC_OP_BLSMSK32
)) );
29902 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29903 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
29905 goto decode_success
;
29907 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
29908 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
29909 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
29910 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 1) {
29911 Int size
= getRexW(pfx
) ? 8 : 4;
29912 IRType ty
= szToITy(size
);
29913 IRTemp src
= newTemp(ty
);
29914 IRTemp dst
= newTemp(ty
);
29915 UChar rm
= getUChar(delta
);
29917 if (epartIsReg(rm
)) {
29918 assign( src
, getIRegE(size
,pfx
,rm
) );
29919 DIP("blsr %s,%s\n", nameIRegE(size
,pfx
,rm
),
29920 nameIRegV(size
,pfx
));
29923 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29924 assign( src
, loadLE(ty
, mkexpr(addr
)) );
29925 DIP("blsr %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
29929 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
29930 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
29931 mkU(ty
, 1)), mkexpr(src
)) );
29932 putIRegV( size
, pfx
, mkexpr(dst
) );
29933 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29934 ? AMD64G_CC_OP_BLSR64
29935 : AMD64G_CC_OP_BLSR32
)) );
29936 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29937 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
29939 goto decode_success
;
29944 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
29945 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
29946 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
29947 Int size
= getRexW(pfx
) ? 8 : 4;
29948 IRType ty
= szToITy(size
);
29949 IRTemp dst
= newTemp(ty
);
29950 IRTemp src1
= newTemp(ty
);
29951 IRTemp src2
= newTemp(ty
);
29952 IRTemp start
= newTemp(Ity_I8
);
29953 IRTemp cond
= newTemp(Ity_I1
);
29954 UChar rm
= getUChar(delta
);
29956 assign( src2
, getIRegV(size
,pfx
) );
29957 if (epartIsReg(rm
)) {
29958 assign( src1
, getIRegE(size
,pfx
,rm
) );
29959 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
),
29960 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
29963 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29964 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
29965 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
29966 nameIRegG(size
,pfx
,rm
));
29970 assign( start
, narrowTo( Ity_I8
, mkexpr(src2
) ) );
29971 assign( cond
, binop(Iop_CmpLT32U
,
29972 unop(Iop_8Uto32
, mkexpr(start
)),
29974 /* if (start < opsize) {
29978 dst = (src1 << (opsize-start)) u>> (opsize-start);
29986 binop(Iop_CmpEQ8
, mkexpr(start
), mkU8(0)),
29989 mkSizedOp(ty
,Iop_Shr8
),
29991 mkSizedOp(ty
,Iop_Shl8
),
29993 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
29995 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30001 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30002 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30003 ? AMD64G_CC_OP_BLSR64
30004 : AMD64G_CC_OP_BLSR32
)) );
30005 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30006 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(cond
))) );
30008 goto decode_success
;
30010 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30011 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30012 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30013 Int size
= getRexW(pfx
) ? 8 : 4;
30014 IRType ty
= szToITy(size
);
30015 IRTemp src
= newTemp(ty
);
30016 IRTemp mask
= newTemp(ty
);
30017 UChar rm
= getUChar(delta
);
30019 assign( src
, getIRegV(size
,pfx
) );
30020 if (epartIsReg(rm
)) {
30021 assign( mask
, getIRegE(size
,pfx
,rm
) );
30022 DIP("pdep %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30023 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30026 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30027 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30028 DIP("pdep %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30029 nameIRegG(size
,pfx
,rm
));
30033 IRExpr
** args
= mkIRExprVec_2( widenUto64(mkexpr(src
)),
30034 widenUto64(mkexpr(mask
)) );
30035 putIRegG( size
, pfx
, rm
,
30036 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30037 "amd64g_calculate_pdep",
30038 &amd64g_calculate_pdep
, args
)) );
30040 /* Flags aren't modified. */
30041 goto decode_success
;
30043 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30044 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30045 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30046 Int size
= getRexW(pfx
) ? 8 : 4;
30047 IRType ty
= szToITy(size
);
30048 IRTemp src
= newTemp(ty
);
30049 IRTemp mask
= newTemp(ty
);
30050 UChar rm
= getUChar(delta
);
30052 assign( src
, getIRegV(size
,pfx
) );
30053 if (epartIsReg(rm
)) {
30054 assign( mask
, getIRegE(size
,pfx
,rm
) );
30055 DIP("pext %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30056 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30059 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30060 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30061 DIP("pext %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30062 nameIRegG(size
,pfx
,rm
));
30066 /* First mask off bits not set in mask, they are ignored
30067 and it should be fine if they contain undefined values. */
30068 IRExpr
* masked
= binop(mkSizedOp(ty
,Iop_And8
),
30069 mkexpr(src
), mkexpr(mask
));
30070 IRExpr
** args
= mkIRExprVec_2( widenUto64(masked
),
30071 widenUto64(mkexpr(mask
)) );
30072 putIRegG( size
, pfx
, rm
,
30073 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30074 "amd64g_calculate_pext",
30075 &amd64g_calculate_pext
, args
)) );
30077 /* Flags aren't modified. */
30078 goto decode_success
;
30083 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30084 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30085 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30086 Int size
= getRexW(pfx
) ? 8 : 4;
30087 IRType ty
= szToITy(size
);
30088 IRTemp src1
= newTemp(ty
);
30089 IRTemp src2
= newTemp(ty
);
30090 IRTemp res
= newTemp(size
== 8 ? Ity_I128
: Ity_I64
);
30091 UChar rm
= getUChar(delta
);
30093 assign( src1
, getIRegRDX(size
) );
30094 if (epartIsReg(rm
)) {
30095 assign( src2
, getIRegE(size
,pfx
,rm
) );
30096 DIP("mulx %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30097 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30100 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30101 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
30102 DIP("mulx %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30103 nameIRegG(size
,pfx
,rm
));
30107 assign( res
, binop(size
== 8 ? Iop_MullU64
: Iop_MullU32
,
30108 mkexpr(src1
), mkexpr(src2
)) );
30109 putIRegV( size
, pfx
,
30110 unop(size
== 8 ? Iop_128to64
: Iop_64to32
, mkexpr(res
)) );
30111 putIRegG( size
, pfx
, rm
,
30112 unop(size
== 8 ? Iop_128HIto64
: Iop_64HIto32
,
30115 /* Flags aren't modified. */
30116 goto decode_success
;
30121 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30122 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30123 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30124 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "sarx", Iop_Sar8
);
30125 goto decode_success
;
30127 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30128 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30129 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30130 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shlx", Iop_Shl8
);
30131 goto decode_success
;
30133 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30134 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30135 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30136 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shrx", Iop_Shr8
);
30137 goto decode_success
;
30139 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30140 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30141 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30142 Int size
= getRexW(pfx
) ? 8 : 4;
30143 IRType ty
= szToITy(size
);
30144 IRTemp dst
= newTemp(ty
);
30145 IRTemp src1
= newTemp(ty
);
30146 IRTemp src2
= newTemp(ty
);
30147 IRTemp stle
= newTemp(Ity_I16
);
30148 IRTemp start
= newTemp(Ity_I8
);
30149 IRTemp len
= newTemp(Ity_I8
);
30150 UChar rm
= getUChar(delta
);
30152 assign( src2
, getIRegV(size
,pfx
) );
30153 if (epartIsReg(rm
)) {
30154 assign( src1
, getIRegE(size
,pfx
,rm
) );
30155 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
),
30156 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
30159 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30160 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
30161 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
30162 nameIRegG(size
,pfx
,rm
));
30166 assign( stle
, narrowTo( Ity_I16
, mkexpr(src2
) ) );
30167 assign( start
, unop( Iop_16to8
, mkexpr(stle
) ) );
30168 assign( len
, unop( Iop_16HIto8
, mkexpr(stle
) ) );
30169 /* if (start+len < opsize) {
30171 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30175 if (start < opsize)
30176 dst = src1 u>> start;
30182 binop(Iop_CmpLT32U
,
30184 unop(Iop_8Uto32
, mkexpr(start
)),
30185 unop(Iop_8Uto32
, mkexpr(len
))),
30188 binop(Iop_CmpEQ8
, mkexpr(len
), mkU8(0)),
30190 binop(mkSizedOp(ty
,Iop_Shr8
),
30191 binop(mkSizedOp(ty
,Iop_Shl8
), mkexpr(src1
),
30193 binop(Iop_Sub8
, mkU8(8*size
),
30196 binop(Iop_Sub8
, mkU8(8*size
),
30200 binop(Iop_CmpLT32U
,
30201 unop(Iop_8Uto32
, mkexpr(start
)),
30203 binop(mkSizedOp(ty
,Iop_Shr8
), mkexpr(src1
),
30209 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30210 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30211 ? AMD64G_CC_OP_ANDN64
30212 : AMD64G_CC_OP_ANDN32
)) );
30213 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30214 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
30216 goto decode_success
;
30236 static Long
decode_vregW(Int count
, Long delta
, UChar modrm
, Prefix pfx
,
30237 const VexAbiInfo
* vbi
, IRTemp
*v
, UInt
*dst
, Int swap
)
30239 v
[0] = newTemp(Ity_V128
);
30240 v
[1] = newTemp(Ity_V128
);
30241 v
[2] = newTemp(Ity_V128
);
30242 v
[3] = newTemp(Ity_V128
);
30243 IRTemp addr
= IRTemp_INVALID
;
30247 *dst
= gregOfRexRM(pfx
, modrm
);
30248 assign( v
[0], getXMMReg(*dst
) );
30250 if ( epartIsReg( modrm
) ) {
30251 UInt ereg
= eregOfRexRM(pfx
, modrm
);
30252 assign(swap
? v
[count
-1] : v
[count
-2], getXMMReg(ereg
) );
30253 DIS(dis_buf
, "%s", nameXMMReg(ereg
));
30255 Bool extra_byte
= (getUChar(delta
- 3) & 0xF) != 9;
30256 addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, extra_byte
);
30257 assign(swap
? v
[count
-1] : v
[count
-2], loadLE(Ity_V128
, mkexpr(addr
)));
30261 UInt vvvv
= getVexNvvvv(pfx
);
30264 DIP( "%s,%s", nameXMMReg(*dst
), dis_buf
);
30267 assign( swap
? v
[1] : v
[2], getXMMReg(vvvv
) );
30268 DIP( "%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
), dis_buf
);
30272 assign( v
[1], getXMMReg(vvvv
) );
30273 UInt src2
= getUChar(delta
+ 1) >> 4;
30274 assign( swap
? v
[2] : v
[3], getXMMReg(src2
) );
30275 DIP( "%s,%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
),
30276 nameXMMReg(src2
), dis_buf
);
30283 static Long
dis_FMA4 (Prefix pfx
, Long delta
, UChar opc
,
30284 Bool
* uses_vvvv
, const VexAbiInfo
* vbi
)
30289 UChar modrm
= getUChar(delta
);
30291 Bool zero_64F
= False
;
30292 Bool zero_96F
= False
;
30293 UInt is_F32
= ((opc
& 0x01) == 0x00) ? 1 : 0;
30294 Bool neg
= (opc
& 0xF0) == 0x70;
30295 Bool alt
= (opc
& 0xF0) == 0x50;
30296 Bool sub
= alt
? (opc
& 0x0E) != 0x0E : (opc
& 0x0C) == 0x0C;
30299 switch(opc
& 0xF) {
30300 case 0x0A: zero_96F
= (opc
>> 4) != 0x05; break;
30301 case 0x0B: zero_64F
= (opc
>> 4) != 0x05; break;
30302 case 0x0E: zero_96F
= (opc
>> 4) != 0x05; break;
30303 case 0x0F: zero_64F
= (opc
>> 4) != 0x05; break;
30306 DIP("vfm%s", neg
? "n" : "");
30307 if(alt
) DIP("%s", sub
? "add" : "sub");
30308 DIP("%s", sub
? "sub" : "add");
30309 DIP("%c ", (zero_64F
|| zero_96F
) ? 's' : 'p');
30310 DIP("%c ", is_F32
? 's' : 'd');
30311 delta
= decode_vregW(4, delta
, modrm
, pfx
, vbi
, operand
, &dst
, getRexW(pfx
));
30315 void (*putXMM
[2])(UInt
,Int
,IRExpr
*) = {&putXMMRegLane64F
, &putXMMRegLane32F
};
30317 IROp size_op
[] = {Iop_V128to64
, Iop_V128HIto64
, Iop_64to32
, Iop_64HIto32
};
30318 IROp neg_op
[] = {Iop_NegF64
, Iop_NegF32
};
30320 for(i
= 0; i
< is_F32
* 2 + 2; i
++) {
30321 for(j
= 0; j
< 3; j
++) {
30323 src
[j
] = unop(Iop_ReinterpI32asF32
,
30324 unop(size_op
[i
%2+2],
30326 mkexpr(operand
[j
+ 1])
30330 src
[j
] = unop(Iop_ReinterpI64asF64
,
30332 mkexpr(operand
[j
+ 1])
30336 putXMM
[is_F32
](dst
, i
, IRExpr_Qop(is_F32
? Iop_MAddF32
: Iop_MAddF64
,
30337 get_FAKE_roundingmode(),
30338 neg
? unop(neg_op
[is_F32
], src
[0])
30341 sub
? unop(neg_op
[is_F32
], src
[2])
30349 /* Zero out top bits of ymm/xmm register. */
30350 putYMMRegLane128( dst
, 1, mkV128(0) );
30352 if(zero_64F
|| zero_96F
) {
30353 putXMMRegLane64( dst
, 1, IRExpr_Const(IRConst_U64(0)));
30357 putXMMRegLane32( dst
, 1, IRExpr_Const(IRConst_U32(0)));
30363 /*------------------------------------------------------------*/
30365 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30367 /*------------------------------------------------------------*/
30369 static IRTemp
math_VPERMILPS_128 ( IRTemp sV
, UInt imm8
)
30371 vassert(imm8
< 256);
30372 IRTemp s3
, s2
, s1
, s0
;
30373 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30374 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
30375 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30376 : ((_nn)==2) ? s2 : s3)
30377 IRTemp res
= newTemp(Ity_V128
);
30378 assign(res
, mkV128from32s( SEL((imm8
>> 6) & 3),
30379 SEL((imm8
>> 4) & 3),
30380 SEL((imm8
>> 2) & 3),
30381 SEL((imm8
>> 0) & 3) ));
30386 __attribute__((noinline
))
30388 Long
dis_ESC_0F3A__VEX (
30389 /*MB_OUT*/DisResult
* dres
,
30390 /*OUT*/ Bool
* uses_vvvv
,
30391 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
30393 void* callback_opaque
,
30394 const VexArchInfo
* archinfo
,
30395 const VexAbiInfo
* vbi
,
30396 Prefix pfx
, Int sz
, Long deltaIN
30399 IRTemp addr
= IRTemp_INVALID
;
30402 Long delta
= deltaIN
;
30403 UChar opc
= getUChar(delta
);
30405 *uses_vvvv
= False
;
30411 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30412 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30413 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
30414 && 1==getRexW(pfx
)/*W1*/) {
30415 UChar modrm
= getUChar(delta
);
30417 UInt rG
= gregOfRexRM(pfx
, modrm
);
30418 IRTemp sV
= newTemp(Ity_V256
);
30419 const HChar
*name
= opc
== 0 ? "vpermq" : "vpermpd";
30420 if (epartIsReg(modrm
)) {
30421 UInt rE
= eregOfRexRM(pfx
, modrm
);
30423 imm8
= getUChar(delta
);
30424 DIP("%s $%u,%s,%s\n",
30425 name
, imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30426 assign(sV
, getYMMReg(rE
));
30428 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30430 imm8
= getUChar(delta
);
30431 DIP("%s $%u,%s,%s\n",
30432 name
, imm8
, dis_buf
, nameYMMReg(rG
));
30433 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30437 s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
30438 breakupV256to64s(sV
, &s
[3], &s
[2], &s
[1], &s
[0]);
30439 IRTemp dV
= newTemp(Ity_V256
);
30440 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30441 mkexpr(s
[(imm8
>> 6) & 3]),
30442 mkexpr(s
[(imm8
>> 4) & 3]),
30443 mkexpr(s
[(imm8
>> 2) & 3]),
30444 mkexpr(s
[(imm8
>> 0) & 3])));
30445 putYMMReg(rG
, mkexpr(dV
));
30446 goto decode_success
;
30451 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30452 if (have66noF2noF3(pfx
)
30453 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
30454 UChar modrm
= getUChar(delta
);
30456 UInt rG
= gregOfRexRM(pfx
, modrm
);
30457 UInt rV
= getVexNvvvv(pfx
);
30458 IRTemp sV
= newTemp(Ity_V128
);
30459 IRTemp dV
= newTemp(Ity_V128
);
30462 assign(sV
, getXMMReg(rV
));
30463 if (epartIsReg(modrm
)) {
30464 UInt rE
= eregOfRexRM(pfx
, modrm
);
30466 imm8
= getUChar(delta
);
30467 DIP("vpblendd $%u,%s,%s,%s\n",
30468 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
30469 assign(dV
, getXMMReg(rE
));
30471 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30473 imm8
= getUChar(delta
);
30474 DIP("vpblendd $%u,%s,%s,%s\n",
30475 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
30476 assign(dV
, loadLE(Ity_V128
, mkexpr(addr
)));
30479 for (i
= 0; i
< 4; i
++) {
30480 s
[i
] = IRTemp_INVALID
;
30481 d
[i
] = IRTemp_INVALID
;
30483 breakupV128to32s( sV
, &s
[3], &s
[2], &s
[1], &s
[0] );
30484 breakupV128to32s( dV
, &d
[3], &d
[2], &d
[1], &d
[0] );
30485 for (i
= 0; i
< 4; i
++)
30486 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30487 putYMMRegLane128(rG
, 1, mkV128(0));
30489 goto decode_success
;
30491 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30492 if (have66noF2noF3(pfx
)
30493 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30494 UChar modrm
= getUChar(delta
);
30496 UInt rG
= gregOfRexRM(pfx
, modrm
);
30497 UInt rV
= getVexNvvvv(pfx
);
30498 IRTemp sV
= newTemp(Ity_V256
);
30499 IRTemp dV
= newTemp(Ity_V256
);
30502 assign(sV
, getYMMReg(rV
));
30503 if (epartIsReg(modrm
)) {
30504 UInt rE
= eregOfRexRM(pfx
, modrm
);
30506 imm8
= getUChar(delta
);
30507 DIP("vpblendd $%u,%s,%s,%s\n",
30508 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30509 assign(dV
, getYMMReg(rE
));
30511 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30513 imm8
= getUChar(delta
);
30514 DIP("vpblendd $%u,%s,%s,%s\n",
30515 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30516 assign(dV
, loadLE(Ity_V256
, mkexpr(addr
)));
30519 for (i
= 0; i
< 8; i
++) {
30520 s
[i
] = IRTemp_INVALID
;
30521 d
[i
] = IRTemp_INVALID
;
30523 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
30524 &s
[3], &s
[2], &s
[1], &s
[0] );
30525 breakupV256to32s( dV
, &d
[7], &d
[6], &d
[5], &d
[4],
30526 &d
[3], &d
[2], &d
[1], &d
[0] );
30527 for (i
= 0; i
< 8; i
++)
30528 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30530 goto decode_success
;
30535 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30536 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30537 UChar modrm
= getUChar(delta
);
30539 UInt rG
= gregOfRexRM(pfx
, modrm
);
30540 IRTemp sV
= newTemp(Ity_V256
);
30541 if (epartIsReg(modrm
)) {
30542 UInt rE
= eregOfRexRM(pfx
, modrm
);
30544 imm8
= getUChar(delta
);
30545 DIP("vpermilps $%u,%s,%s\n",
30546 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30547 assign(sV
, getYMMReg(rE
));
30549 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30551 imm8
= getUChar(delta
);
30552 DIP("vpermilps $%u,%s,%s\n",
30553 imm8
, dis_buf
, nameYMMReg(rG
));
30554 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30557 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
30558 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
30559 IRTemp dVhi
= math_VPERMILPS_128( sVhi
, imm8
);
30560 IRTemp dVlo
= math_VPERMILPS_128( sVlo
, imm8
);
30561 IRExpr
* res
= binop(Iop_V128HLtoV256
, mkexpr(dVhi
), mkexpr(dVlo
));
30562 putYMMReg(rG
, res
);
30563 goto decode_success
;
30565 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30566 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30567 UChar modrm
= getUChar(delta
);
30569 UInt rG
= gregOfRexRM(pfx
, modrm
);
30570 IRTemp sV
= newTemp(Ity_V128
);
30571 if (epartIsReg(modrm
)) {
30572 UInt rE
= eregOfRexRM(pfx
, modrm
);
30574 imm8
= getUChar(delta
);
30575 DIP("vpermilps $%u,%s,%s\n",
30576 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30577 assign(sV
, getXMMReg(rE
));
30579 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30581 imm8
= getUChar(delta
);
30582 DIP("vpermilps $%u,%s,%s\n",
30583 imm8
, dis_buf
, nameXMMReg(rG
));
30584 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30587 putYMMRegLoAndZU(rG
, mkexpr ( math_VPERMILPS_128 ( sV
, imm8
) ) );
30588 goto decode_success
;
30593 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30594 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30595 UChar modrm
= getUChar(delta
);
30597 UInt rG
= gregOfRexRM(pfx
, modrm
);
30598 IRTemp sV
= newTemp(Ity_V128
);
30599 if (epartIsReg(modrm
)) {
30600 UInt rE
= eregOfRexRM(pfx
, modrm
);
30602 imm8
= getUChar(delta
);
30603 DIP("vpermilpd $%u,%s,%s\n",
30604 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30605 assign(sV
, getXMMReg(rE
));
30607 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30609 imm8
= getUChar(delta
);
30610 DIP("vpermilpd $%u,%s,%s\n",
30611 imm8
, dis_buf
, nameXMMReg(rG
));
30612 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30615 IRTemp s1
= newTemp(Ity_I64
);
30616 IRTemp s0
= newTemp(Ity_I64
);
30617 assign(s1
, unop(Iop_V128HIto64
, mkexpr(sV
)));
30618 assign(s0
, unop(Iop_V128to64
, mkexpr(sV
)));
30619 IRTemp dV
= newTemp(Ity_V128
);
30620 assign(dV
, binop(Iop_64HLtoV128
,
30621 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30622 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30623 putYMMRegLoAndZU(rG
, mkexpr(dV
));
30624 goto decode_success
;
30626 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30627 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30628 UChar modrm
= getUChar(delta
);
30630 UInt rG
= gregOfRexRM(pfx
, modrm
);
30631 IRTemp sV
= newTemp(Ity_V256
);
30632 if (epartIsReg(modrm
)) {
30633 UInt rE
= eregOfRexRM(pfx
, modrm
);
30635 imm8
= getUChar(delta
);
30636 DIP("vpermilpd $%u,%s,%s\n",
30637 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30638 assign(sV
, getYMMReg(rE
));
30640 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30642 imm8
= getUChar(delta
);
30643 DIP("vpermilpd $%u,%s,%s\n",
30644 imm8
, dis_buf
, nameYMMReg(rG
));
30645 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30648 IRTemp s3
, s2
, s1
, s0
;
30649 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30650 breakupV256to64s(sV
, &s3
, &s2
, &s1
, &s0
);
30651 IRTemp dV
= newTemp(Ity_V256
);
30652 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30653 mkexpr((imm8
& (1<<3)) ? s3
: s2
),
30654 mkexpr((imm8
& (1<<2)) ? s3
: s2
),
30655 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30656 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30657 putYMMReg(rG
, mkexpr(dV
));
30658 goto decode_success
;
30663 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30664 if (have66noF2noF3(pfx
)
30665 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30666 UChar modrm
= getUChar(delta
);
30668 UInt rG
= gregOfRexRM(pfx
, modrm
);
30669 UInt rV
= getVexNvvvv(pfx
);
30670 IRTemp s00
= newTemp(Ity_V128
);
30671 IRTemp s01
= newTemp(Ity_V128
);
30672 IRTemp s10
= newTemp(Ity_V128
);
30673 IRTemp s11
= newTemp(Ity_V128
);
30674 assign(s00
, getYMMRegLane128(rV
, 0));
30675 assign(s01
, getYMMRegLane128(rV
, 1));
30676 if (epartIsReg(modrm
)) {
30677 UInt rE
= eregOfRexRM(pfx
, modrm
);
30679 imm8
= getUChar(delta
);
30680 DIP("vperm2f128 $%u,%s,%s,%s\n",
30681 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30682 assign(s10
, getYMMRegLane128(rE
, 0));
30683 assign(s11
, getYMMRegLane128(rE
, 1));
30685 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30687 imm8
= getUChar(delta
);
30688 DIP("vperm2f128 $%u,%s,%s,%s\n",
30689 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30690 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
30691 mkexpr(addr
), mkU64(0))));
30692 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
30693 mkexpr(addr
), mkU64(16))));
30696 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30697 : ((_nn)==2) ? s10 : s11)
30698 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
30699 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
30701 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
30702 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
30704 goto decode_success
;
30709 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30710 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30711 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30712 UChar modrm
= getUChar(delta
);
30713 UInt rG
= gregOfRexRM(pfx
, modrm
);
30714 IRTemp src
= newTemp(Ity_V128
);
30715 IRTemp s0
= IRTemp_INVALID
;
30716 IRTemp s1
= IRTemp_INVALID
;
30717 IRTemp s2
= IRTemp_INVALID
;
30718 IRTemp s3
= IRTemp_INVALID
;
30719 IRTemp rm
= newTemp(Ity_I32
);
30722 modrm
= getUChar(delta
);
30724 if (epartIsReg(modrm
)) {
30725 UInt rE
= eregOfRexRM(pfx
, modrm
);
30726 assign( src
, getXMMReg( rE
) );
30727 imm
= getUChar(delta
+1);
30728 if (imm
& ~15) break;
30730 DIP( "vroundps $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
30732 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30733 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
30734 imm
= getUChar(delta
+alen
);
30735 if (imm
& ~15) break;
30737 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
30740 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30741 that encoding is the same as the encoding for IRRoundingMode,
30742 we can use that value directly in the IR as a rounding
30744 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30746 breakupV128to32s( src
, &s3
, &s2
, &s1
, &s0
);
30747 putYMMRegLane128( rG
, 1, mkV128(0) );
30748 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30749 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30750 putYMMRegLane32F( rG
, 3, CVT(s3
) );
30751 putYMMRegLane32F( rG
, 2, CVT(s2
) );
30752 putYMMRegLane32F( rG
, 1, CVT(s1
) );
30753 putYMMRegLane32F( rG
, 0, CVT(s0
) );
30755 goto decode_success
;
30757 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30758 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30759 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30760 UChar modrm
= getUChar(delta
);
30761 UInt rG
= gregOfRexRM(pfx
, modrm
);
30762 IRTemp src
= newTemp(Ity_V256
);
30763 IRTemp s0
= IRTemp_INVALID
;
30764 IRTemp s1
= IRTemp_INVALID
;
30765 IRTemp s2
= IRTemp_INVALID
;
30766 IRTemp s3
= IRTemp_INVALID
;
30767 IRTemp s4
= IRTemp_INVALID
;
30768 IRTemp s5
= IRTemp_INVALID
;
30769 IRTemp s6
= IRTemp_INVALID
;
30770 IRTemp s7
= IRTemp_INVALID
;
30771 IRTemp rm
= newTemp(Ity_I32
);
30774 modrm
= getUChar(delta
);
30776 if (epartIsReg(modrm
)) {
30777 UInt rE
= eregOfRexRM(pfx
, modrm
);
30778 assign( src
, getYMMReg( rE
) );
30779 imm
= getUChar(delta
+1);
30780 if (imm
& ~15) break;
30782 DIP( "vroundps $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
30784 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30785 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
30786 imm
= getUChar(delta
+alen
);
30787 if (imm
& ~15) break;
30789 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
30792 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30793 that encoding is the same as the encoding for IRRoundingMode,
30794 we can use that value directly in the IR as a rounding
30796 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30798 breakupV256to32s( src
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
30799 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30800 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30801 putYMMRegLane32F( rG
, 7, CVT(s7
) );
30802 putYMMRegLane32F( rG
, 6, CVT(s6
) );
30803 putYMMRegLane32F( rG
, 5, CVT(s5
) );
30804 putYMMRegLane32F( rG
, 4, CVT(s4
) );
30805 putYMMRegLane32F( rG
, 3, CVT(s3
) );
30806 putYMMRegLane32F( rG
, 2, CVT(s2
) );
30807 putYMMRegLane32F( rG
, 1, CVT(s1
) );
30808 putYMMRegLane32F( rG
, 0, CVT(s0
) );
30810 goto decode_success
;
30814 /* VROUNDPD imm8, xmm2/m128, xmm1 */
30815 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
30816 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30817 UChar modrm
= getUChar(delta
);
30818 UInt rG
= gregOfRexRM(pfx
, modrm
);
30819 IRTemp src
= newTemp(Ity_V128
);
30820 IRTemp s0
= IRTemp_INVALID
;
30821 IRTemp s1
= IRTemp_INVALID
;
30822 IRTemp rm
= newTemp(Ity_I32
);
30825 modrm
= getUChar(delta
);
30827 if (epartIsReg(modrm
)) {
30828 UInt rE
= eregOfRexRM(pfx
, modrm
);
30829 assign( src
, getXMMReg( rE
) );
30830 imm
= getUChar(delta
+1);
30831 if (imm
& ~15) break;
30833 DIP( "vroundpd $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
30835 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30836 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
30837 imm
= getUChar(delta
+alen
);
30838 if (imm
& ~15) break;
30840 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
30843 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30844 that encoding is the same as the encoding for IRRoundingMode,
30845 we can use that value directly in the IR as a rounding
30847 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30849 breakupV128to64s( src
, &s1
, &s0
);
30850 putYMMRegLane128( rG
, 1, mkV128(0) );
30851 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30852 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30853 putYMMRegLane64F( rG
, 1, CVT(s1
) );
30854 putYMMRegLane64F( rG
, 0, CVT(s0
) );
30856 goto decode_success
;
30858 /* VROUNDPD imm8, ymm2/m256, ymm1 */
30859 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
30860 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30861 UChar modrm
= getUChar(delta
);
30862 UInt rG
= gregOfRexRM(pfx
, modrm
);
30863 IRTemp src
= newTemp(Ity_V256
);
30864 IRTemp s0
= IRTemp_INVALID
;
30865 IRTemp s1
= IRTemp_INVALID
;
30866 IRTemp s2
= IRTemp_INVALID
;
30867 IRTemp s3
= IRTemp_INVALID
;
30868 IRTemp rm
= newTemp(Ity_I32
);
30871 modrm
= getUChar(delta
);
30873 if (epartIsReg(modrm
)) {
30874 UInt rE
= eregOfRexRM(pfx
, modrm
);
30875 assign( src
, getYMMReg( rE
) );
30876 imm
= getUChar(delta
+1);
30877 if (imm
& ~15) break;
30879 DIP( "vroundpd $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
30881 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30882 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
30883 imm
= getUChar(delta
+alen
);
30884 if (imm
& ~15) break;
30886 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
30889 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30890 that encoding is the same as the encoding for IRRoundingMode,
30891 we can use that value directly in the IR as a rounding
30893 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30895 breakupV256to64s( src
, &s3
, &s2
, &s1
, &s0
);
30896 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30897 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30898 putYMMRegLane64F( rG
, 3, CVT(s3
) );
30899 putYMMRegLane64F( rG
, 2, CVT(s2
) );
30900 putYMMRegLane64F( rG
, 1, CVT(s1
) );
30901 putYMMRegLane64F( rG
, 0, CVT(s0
) );
30903 goto decode_success
;
30908 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
30909 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
30910 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
30911 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
30912 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30913 UChar modrm
= getUChar(delta
);
30914 UInt rG
= gregOfRexRM(pfx
, modrm
);
30915 UInt rV
= getVexNvvvv(pfx
);
30916 Bool isD
= opc
== 0x0B;
30917 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
30918 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
30921 if (epartIsReg(modrm
)) {
30922 UInt rE
= eregOfRexRM(pfx
, modrm
);
30924 isD
? getXMMRegLane64F(rE
, 0) : getXMMRegLane32F(rE
, 0) );
30925 imm
= getUChar(delta
+1);
30926 if (imm
& ~15) break;
30928 DIP( "vrounds%c $%d,%s,%s,%s\n",
30930 imm
, nameXMMReg( rE
), nameXMMReg( rV
), nameXMMReg( rG
) );
30932 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30933 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
30934 imm
= getUChar(delta
+alen
);
30935 if (imm
& ~15) break;
30937 DIP( "vrounds%c $%d,%s,%s,%s\n",
30939 imm
, dis_buf
, nameXMMReg( rV
), nameXMMReg( rG
) );
30942 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30943 that encoding is the same as the encoding for IRRoundingMode,
30944 we can use that value directly in the IR as a rounding
30946 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
30947 (imm
& 4) ? get_sse_roundingmode()
30952 putXMMRegLane64F( rG
, 0, mkexpr(res
) );
30954 putXMMRegLane32F( rG
, 0, mkexpr(res
) );
30955 putXMMRegLane32F( rG
, 1, getXMMRegLane32F( rV
, 1 ) );
30957 putXMMRegLane64F( rG
, 1, getXMMRegLane64F( rV
, 1 ) );
30958 putYMMRegLane128( rG
, 1, mkV128(0) );
30960 goto decode_success
;
30965 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
30966 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
30967 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30968 UChar modrm
= getUChar(delta
);
30970 UInt rG
= gregOfRexRM(pfx
, modrm
);
30971 UInt rV
= getVexNvvvv(pfx
);
30972 IRTemp sV
= newTemp(Ity_V256
);
30973 IRTemp sE
= newTemp(Ity_V256
);
30974 assign ( sV
, getYMMReg(rV
) );
30975 if (epartIsReg(modrm
)) {
30976 UInt rE
= eregOfRexRM(pfx
, modrm
);
30978 imm8
= getUChar(delta
);
30979 DIP("vblendps $%u,%s,%s,%s\n",
30980 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30981 assign(sE
, getYMMReg(rE
));
30983 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30985 imm8
= getUChar(delta
);
30986 DIP("vblendps $%u,%s,%s,%s\n",
30987 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30988 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
30992 mkexpr( math_BLENDPS_256( sE
, sV
, imm8
) ) );
30994 goto decode_success
;
30996 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
30997 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
30998 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30999 UChar modrm
= getUChar(delta
);
31001 UInt rG
= gregOfRexRM(pfx
, modrm
);
31002 UInt rV
= getVexNvvvv(pfx
);
31003 IRTemp sV
= newTemp(Ity_V128
);
31004 IRTemp sE
= newTemp(Ity_V128
);
31005 assign ( sV
, getXMMReg(rV
) );
31006 if (epartIsReg(modrm
)) {
31007 UInt rE
= eregOfRexRM(pfx
, modrm
);
31009 imm8
= getUChar(delta
);
31010 DIP("vblendps $%u,%s,%s,%s\n",
31011 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31012 assign(sE
, getXMMReg(rE
));
31014 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31016 imm8
= getUChar(delta
);
31017 DIP("vblendps $%u,%s,%s,%s\n",
31018 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31019 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31022 putYMMRegLoAndZU( rG
,
31023 mkexpr( math_BLENDPS_128( sE
, sV
, imm8
) ) );
31025 goto decode_success
;
31030 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31031 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31032 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31033 UChar modrm
= getUChar(delta
);
31035 UInt rG
= gregOfRexRM(pfx
, modrm
);
31036 UInt rV
= getVexNvvvv(pfx
);
31037 IRTemp sV
= newTemp(Ity_V256
);
31038 IRTemp sE
= newTemp(Ity_V256
);
31039 assign ( sV
, getYMMReg(rV
) );
31040 if (epartIsReg(modrm
)) {
31041 UInt rE
= eregOfRexRM(pfx
, modrm
);
31043 imm8
= getUChar(delta
);
31044 DIP("vblendpd $%u,%s,%s,%s\n",
31045 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31046 assign(sE
, getYMMReg(rE
));
31048 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31050 imm8
= getUChar(delta
);
31051 DIP("vblendpd $%u,%s,%s,%s\n",
31052 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31053 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31057 mkexpr( math_BLENDPD_256( sE
, sV
, imm8
) ) );
31059 goto decode_success
;
31061 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31062 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31063 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31064 UChar modrm
= getUChar(delta
);
31066 UInt rG
= gregOfRexRM(pfx
, modrm
);
31067 UInt rV
= getVexNvvvv(pfx
);
31068 IRTemp sV
= newTemp(Ity_V128
);
31069 IRTemp sE
= newTemp(Ity_V128
);
31070 assign ( sV
, getXMMReg(rV
) );
31071 if (epartIsReg(modrm
)) {
31072 UInt rE
= eregOfRexRM(pfx
, modrm
);
31074 imm8
= getUChar(delta
);
31075 DIP("vblendpd $%u,%s,%s,%s\n",
31076 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31077 assign(sE
, getXMMReg(rE
));
31079 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31081 imm8
= getUChar(delta
);
31082 DIP("vblendpd $%u,%s,%s,%s\n",
31083 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31084 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31087 putYMMRegLoAndZU( rG
,
31088 mkexpr( math_BLENDPD_128( sE
, sV
, imm8
) ) );
31090 goto decode_success
;
31095 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31096 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31097 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31098 UChar modrm
= getUChar(delta
);
31100 UInt rG
= gregOfRexRM(pfx
, modrm
);
31101 UInt rV
= getVexNvvvv(pfx
);
31102 IRTemp sV
= newTemp(Ity_V128
);
31103 IRTemp sE
= newTemp(Ity_V128
);
31104 assign ( sV
, getXMMReg(rV
) );
31105 if (epartIsReg(modrm
)) {
31106 UInt rE
= eregOfRexRM(pfx
, modrm
);
31108 imm8
= getUChar(delta
);
31109 DIP("vpblendw $%u,%s,%s,%s\n",
31110 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31111 assign(sE
, getXMMReg(rE
));
31113 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31115 imm8
= getUChar(delta
);
31116 DIP("vpblendw $%u,%s,%s,%s\n",
31117 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31118 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31121 putYMMRegLoAndZU( rG
,
31122 mkexpr( math_PBLENDW_128( sE
, sV
, imm8
) ) );
31124 goto decode_success
;
31126 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31127 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31128 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31129 UChar modrm
= getUChar(delta
);
31131 UInt rG
= gregOfRexRM(pfx
, modrm
);
31132 UInt rV
= getVexNvvvv(pfx
);
31133 IRTemp sV
= newTemp(Ity_V256
);
31134 IRTemp sE
= newTemp(Ity_V256
);
31135 IRTemp sVhi
, sVlo
, sEhi
, sElo
;
31136 sVhi
= sVlo
= sEhi
= sElo
= IRTemp_INVALID
;
31137 assign ( sV
, getYMMReg(rV
) );
31138 if (epartIsReg(modrm
)) {
31139 UInt rE
= eregOfRexRM(pfx
, modrm
);
31141 imm8
= getUChar(delta
);
31142 DIP("vpblendw $%u,%s,%s,%s\n",
31143 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31144 assign(sE
, getYMMReg(rE
));
31146 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31148 imm8
= getUChar(delta
);
31149 DIP("vpblendw $%u,%s,%s,%s\n",
31150 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31151 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31154 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
31155 breakupV256toV128s( sE
, &sEhi
, &sElo
);
31156 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31157 mkexpr( math_PBLENDW_128( sEhi
, sVhi
, imm8
) ),
31158 mkexpr( math_PBLENDW_128( sElo
, sVlo
, imm8
) ) ) );
31160 goto decode_success
;
31165 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31166 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31167 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31168 UChar modrm
= getUChar(delta
);
31169 UInt rG
= gregOfRexRM(pfx
, modrm
);
31170 UInt rV
= getVexNvvvv(pfx
);
31171 IRTemp sV
= newTemp(Ity_V128
);
31172 IRTemp dV
= newTemp(Ity_V128
);
31175 assign( dV
, getXMMReg(rV
) );
31177 if ( epartIsReg( modrm
) ) {
31178 UInt rE
= eregOfRexRM(pfx
, modrm
);
31179 assign( sV
, getXMMReg(rE
) );
31180 imm8
= getUChar(delta
+1);
31182 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameXMMReg(rE
),
31183 nameXMMReg(rV
), nameXMMReg(rG
));
31185 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31186 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
31187 imm8
= getUChar(delta
+alen
);
31189 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31190 nameXMMReg(rV
), nameXMMReg(rG
));
31193 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, imm8
);
31194 putYMMRegLoAndZU( rG
, mkexpr(res
) );
31196 goto decode_success
;
31198 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31199 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31200 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31201 UChar modrm
= getUChar(delta
);
31202 UInt rG
= gregOfRexRM(pfx
, modrm
);
31203 UInt rV
= getVexNvvvv(pfx
);
31204 IRTemp sV
= newTemp(Ity_V256
);
31205 IRTemp dV
= newTemp(Ity_V256
);
31206 IRTemp sHi
, sLo
, dHi
, dLo
;
31207 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31210 assign( dV
, getYMMReg(rV
) );
31212 if ( epartIsReg( modrm
) ) {
31213 UInt rE
= eregOfRexRM(pfx
, modrm
);
31214 assign( sV
, getYMMReg(rE
) );
31215 imm8
= getUChar(delta
+1);
31217 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameYMMReg(rE
),
31218 nameYMMReg(rV
), nameYMMReg(rG
));
31220 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31221 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
31222 imm8
= getUChar(delta
+alen
);
31224 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31225 nameYMMReg(rV
), nameYMMReg(rG
));
31228 breakupV256toV128s( dV
, &dHi
, &dLo
);
31229 breakupV256toV128s( sV
, &sHi
, &sLo
);
31230 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31231 mkexpr( math_PALIGNR_XMM( sHi
, dHi
, imm8
) ),
31232 mkexpr( math_PALIGNR_XMM( sLo
, dLo
, imm8
) ) )
31235 goto decode_success
;
31240 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31241 if (have66noF2noF3(pfx
)
31242 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31243 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
31244 goto decode_success
;
31249 /* VPEXTRW imm8, reg/m16, xmm2 */
31250 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31251 if (have66noF2noF3(pfx
)
31252 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31253 delta
= dis_PEXTRW( vbi
, pfx
, delta
, True
/*isAvx*/ );
31254 goto decode_success
;
31259 /* VPEXTRD imm8, r32/m32, xmm2 */
31260 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31261 if (have66noF2noF3(pfx
)
31262 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31263 delta
= dis_PEXTRD( vbi
, pfx
, delta
, True
/*isAvx*/ );
31264 goto decode_success
;
31266 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31267 if (have66noF2noF3(pfx
)
31268 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31269 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, True
/*isAvx*/ );
31270 goto decode_success
;
31275 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31276 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31277 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, True
/*isAvx*/ );
31278 goto decode_success
;
31283 /* VINSERTF128 r/m, rV, rD
31284 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31285 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31286 if (have66noF2noF3(pfx
)
31287 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31288 UChar modrm
= getUChar(delta
);
31290 UInt rG
= gregOfRexRM(pfx
, modrm
);
31291 UInt rV
= getVexNvvvv(pfx
);
31292 IRTemp t128
= newTemp(Ity_V128
);
31293 if (epartIsReg(modrm
)) {
31294 UInt rE
= eregOfRexRM(pfx
, modrm
);
31296 assign(t128
, getXMMReg(rE
));
31297 ib
= getUChar(delta
);
31298 DIP("vinsertf128 $%u,%s,%s,%s\n",
31299 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31301 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31302 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31304 ib
= getUChar(delta
);
31305 DIP("vinsertf128 $%u,%s,%s,%s\n",
31306 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31309 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31310 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31311 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31313 goto decode_success
;
31318 /* VEXTRACTF128 $lane_no, rS, r/m
31319 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31320 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31321 if (have66noF2noF3(pfx
)
31322 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31323 UChar modrm
= getUChar(delta
);
31325 UInt rS
= gregOfRexRM(pfx
, modrm
);
31326 IRTemp t128
= newTemp(Ity_V128
);
31327 if (epartIsReg(modrm
)) {
31328 UInt rD
= eregOfRexRM(pfx
, modrm
);
31330 ib
= getUChar(delta
);
31331 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31332 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31333 DIP("vextractf128 $%u,%s,%s\n",
31334 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31336 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31338 ib
= getUChar(delta
);
31339 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31340 storeLE(mkexpr(addr
), mkexpr(t128
));
31341 DIP("vextractf128 $%u,%s,%s\n",
31342 ib
, nameYMMReg(rS
), dis_buf
);
31345 /* doesn't use vvvv */
31346 goto decode_success
;
31351 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31352 if (have66noF2noF3(pfx
)
31353 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31354 UChar modrm
= getUChar(delta
);
31355 UInt rG
= gregOfRexRM(pfx
, modrm
);
31356 UInt rV
= getVexNvvvv(pfx
);
31358 IRTemp src_u8
= newTemp(Ity_I8
);
31360 if ( epartIsReg( modrm
) ) {
31361 UInt rE
= eregOfRexRM(pfx
,modrm
);
31362 imm8
= (Int
)(getUChar(delta
+1) & 15);
31363 assign( src_u8
, unop(Iop_32to8
, getIReg32( rE
)) );
31365 DIP( "vpinsrb $%d,%s,%s,%s\n",
31366 imm8
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31368 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31369 imm8
= (Int
)(getUChar(delta
+alen
) & 15);
31370 assign( src_u8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
31372 DIP( "vpinsrb $%d,%s,%s,%s\n",
31373 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31376 IRTemp src_vec
= newTemp(Ity_V128
);
31377 assign(src_vec
, getXMMReg( rV
));
31378 IRTemp res_vec
= math_PINSRB_128( src_vec
, src_u8
, imm8
);
31379 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31381 goto decode_success
;
31386 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31387 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31388 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31389 UChar modrm
= getUChar(delta
);
31390 UInt rG
= gregOfRexRM(pfx
, modrm
);
31391 UInt rV
= getVexNvvvv(pfx
);
31393 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
31394 const IRTemp inval
= IRTemp_INVALID
;
31396 if ( epartIsReg( modrm
) ) {
31397 UInt rE
= eregOfRexRM(pfx
, modrm
);
31398 IRTemp vE
= newTemp(Ity_V128
);
31399 assign( vE
, getXMMReg(rE
) );
31400 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
31401 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
31402 imm8
= getUChar(delta
+1);
31403 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
31405 DIP( "insertps $%u, %s,%s\n",
31406 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
31408 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31409 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31410 imm8
= getUChar(delta
+alen
);
31412 DIP( "insertps $%u, %s,%s\n",
31413 imm8
, dis_buf
, nameXMMReg(rG
) );
31416 IRTemp vV
= newTemp(Ity_V128
);
31417 assign( vV
, getXMMReg(rV
) );
31419 putYMMRegLoAndZU( rG
, mkexpr(math_INSERTPS( vV
, d2ins
, imm8
)) );
31421 goto decode_success
;
31426 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31427 if (have66noF2noF3(pfx
)
31428 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31429 UChar modrm
= getUChar(delta
);
31430 UInt rG
= gregOfRexRM(pfx
, modrm
);
31431 UInt rV
= getVexNvvvv(pfx
);
31433 IRTemp src_u32
= newTemp(Ity_I32
);
31435 if ( epartIsReg( modrm
) ) {
31436 UInt rE
= eregOfRexRM(pfx
,modrm
);
31437 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
31438 assign( src_u32
, getIReg32( rE
) );
31440 DIP( "vpinsrd $%d,%s,%s,%s\n",
31441 imm8_10
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31443 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31444 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
31445 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31447 DIP( "vpinsrd $%d,%s,%s,%s\n",
31448 imm8_10
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31451 IRTemp src_vec
= newTemp(Ity_V128
);
31452 assign(src_vec
, getXMMReg( rV
));
31453 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
31454 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31456 goto decode_success
;
31458 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31459 if (have66noF2noF3(pfx
)
31460 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31461 UChar modrm
= getUChar(delta
);
31462 UInt rG
= gregOfRexRM(pfx
, modrm
);
31463 UInt rV
= getVexNvvvv(pfx
);
31465 IRTemp src_u64
= newTemp(Ity_I64
);
31467 if ( epartIsReg( modrm
) ) {
31468 UInt rE
= eregOfRexRM(pfx
,modrm
);
31469 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
31470 assign( src_u64
, getIReg64( rE
) );
31472 DIP( "vpinsrq $%d,%s,%s,%s\n",
31473 imm8_0
, nameIReg64(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31475 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31476 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
31477 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
31479 DIP( "vpinsrd $%d,%s,%s,%s\n",
31480 imm8_0
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31483 IRTemp src_vec
= newTemp(Ity_V128
);
31484 assign(src_vec
, getXMMReg( rV
));
31485 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
31486 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31488 goto decode_success
;
31493 /* VINSERTI128 r/m, rV, rD
31494 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31495 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31496 if (have66noF2noF3(pfx
)
31497 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31498 UChar modrm
= getUChar(delta
);
31500 UInt rG
= gregOfRexRM(pfx
, modrm
);
31501 UInt rV
= getVexNvvvv(pfx
);
31502 IRTemp t128
= newTemp(Ity_V128
);
31503 if (epartIsReg(modrm
)) {
31504 UInt rE
= eregOfRexRM(pfx
, modrm
);
31506 assign(t128
, getXMMReg(rE
));
31507 ib
= getUChar(delta
);
31508 DIP("vinserti128 $%u,%s,%s,%s\n",
31509 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31511 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31512 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31514 ib
= getUChar(delta
);
31515 DIP("vinserti128 $%u,%s,%s,%s\n",
31516 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31519 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31520 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31521 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31523 goto decode_success
;
31528 /* VEXTRACTI128 $lane_no, rS, r/m
31529 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31530 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31531 if (have66noF2noF3(pfx
)
31532 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31533 UChar modrm
= getUChar(delta
);
31535 UInt rS
= gregOfRexRM(pfx
, modrm
);
31536 IRTemp t128
= newTemp(Ity_V128
);
31537 if (epartIsReg(modrm
)) {
31538 UInt rD
= eregOfRexRM(pfx
, modrm
);
31540 ib
= getUChar(delta
);
31541 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31542 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31543 DIP("vextracti128 $%u,%s,%s\n",
31544 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31546 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31548 ib
= getUChar(delta
);
31549 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31550 storeLE(mkexpr(addr
), mkexpr(t128
));
31551 DIP("vextracti128 $%u,%s,%s\n",
31552 ib
, nameYMMReg(rS
), dis_buf
);
31555 /* doesn't use vvvv */
31556 goto decode_success
;
31561 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31562 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31563 UChar modrm
= getUChar(delta
);
31564 UInt rG
= gregOfRexRM(pfx
, modrm
);
31565 UInt rV
= getVexNvvvv(pfx
);
31566 IRTemp dst_vec
= newTemp(Ity_V128
);
31568 if (epartIsReg( modrm
)) {
31569 UInt rE
= eregOfRexRM(pfx
,modrm
);
31570 imm8
= (Int
)getUChar(delta
+1);
31571 assign( dst_vec
, getXMMReg( rE
) );
31573 DIP( "vdpps $%d,%s,%s,%s\n",
31574 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31576 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31577 imm8
= (Int
)getUChar(delta
+alen
);
31578 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31580 DIP( "vdpps $%d,%s,%s,%s\n",
31581 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31584 IRTemp src_vec
= newTemp(Ity_V128
);
31585 assign(src_vec
, getXMMReg( rV
));
31586 IRTemp res_vec
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
31587 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31589 goto decode_success
;
31591 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31592 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31593 UChar modrm
= getUChar(delta
);
31594 UInt rG
= gregOfRexRM(pfx
, modrm
);
31595 UInt rV
= getVexNvvvv(pfx
);
31596 IRTemp dst_vec
= newTemp(Ity_V256
);
31598 if (epartIsReg( modrm
)) {
31599 UInt rE
= eregOfRexRM(pfx
,modrm
);
31600 imm8
= (Int
)getUChar(delta
+1);
31601 assign( dst_vec
, getYMMReg( rE
) );
31603 DIP( "vdpps $%d,%s,%s,%s\n",
31604 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31606 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31607 imm8
= (Int
)getUChar(delta
+alen
);
31608 assign( dst_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31610 DIP( "vdpps $%d,%s,%s,%s\n",
31611 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31614 IRTemp src_vec
= newTemp(Ity_V256
);
31615 assign(src_vec
, getYMMReg( rV
));
31616 IRTemp s0
, s1
, d0
, d1
;
31617 s0
= s1
= d0
= d1
= IRTemp_INVALID
;
31618 breakupV256toV128s( dst_vec
, &d1
, &d0
);
31619 breakupV256toV128s( src_vec
, &s1
, &s0
);
31620 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31621 mkexpr( math_DPPS_128(s1
, d1
, imm8
) ),
31622 mkexpr( math_DPPS_128(s0
, d0
, imm8
) ) ) );
31624 goto decode_success
;
31629 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31630 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31631 UChar modrm
= getUChar(delta
);
31632 UInt rG
= gregOfRexRM(pfx
, modrm
);
31633 UInt rV
= getVexNvvvv(pfx
);
31634 IRTemp dst_vec
= newTemp(Ity_V128
);
31636 if (epartIsReg( modrm
)) {
31637 UInt rE
= eregOfRexRM(pfx
,modrm
);
31638 imm8
= (Int
)getUChar(delta
+1);
31639 assign( dst_vec
, getXMMReg( rE
) );
31641 DIP( "vdppd $%d,%s,%s,%s\n",
31642 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31644 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31645 imm8
= (Int
)getUChar(delta
+alen
);
31646 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31648 DIP( "vdppd $%d,%s,%s,%s\n",
31649 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31652 IRTemp src_vec
= newTemp(Ity_V128
);
31653 assign(src_vec
, getXMMReg( rV
));
31654 IRTemp res_vec
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
31655 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31657 goto decode_success
;
31662 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31663 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31664 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31665 UChar modrm
= getUChar(delta
);
31667 IRTemp src_vec
= newTemp(Ity_V128
);
31668 IRTemp dst_vec
= newTemp(Ity_V128
);
31669 UInt rG
= gregOfRexRM(pfx
, modrm
);
31670 UInt rV
= getVexNvvvv(pfx
);
31672 assign( dst_vec
, getXMMReg(rV
) );
31674 if ( epartIsReg( modrm
) ) {
31675 UInt rE
= eregOfRexRM(pfx
, modrm
);
31677 imm8
= (Int
)getUChar(delta
+1);
31678 assign( src_vec
, getXMMReg(rE
) );
31680 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31681 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31683 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31684 1/* imm8 is 1 byte after the amode */ );
31685 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31686 imm8
= (Int
)getUChar(delta
+alen
);
31688 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31689 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31692 putYMMRegLoAndZU( rG
, mkexpr( math_MPSADBW_128(dst_vec
,
31693 src_vec
, imm8
) ) );
31695 goto decode_success
;
31697 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31698 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31699 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31700 UChar modrm
= getUChar(delta
);
31702 IRTemp src_vec
= newTemp(Ity_V256
);
31703 IRTemp dst_vec
= newTemp(Ity_V256
);
31704 UInt rG
= gregOfRexRM(pfx
, modrm
);
31705 UInt rV
= getVexNvvvv(pfx
);
31706 IRTemp sHi
, sLo
, dHi
, dLo
;
31707 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31709 assign( dst_vec
, getYMMReg(rV
) );
31711 if ( epartIsReg( modrm
) ) {
31712 UInt rE
= eregOfRexRM(pfx
, modrm
);
31714 imm8
= (Int
)getUChar(delta
+1);
31715 assign( src_vec
, getYMMReg(rE
) );
31717 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31718 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31720 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31721 1/* imm8 is 1 byte after the amode */ );
31722 assign( src_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31723 imm8
= (Int
)getUChar(delta
+alen
);
31725 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31726 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31729 breakupV256toV128s( dst_vec
, &dHi
, &dLo
);
31730 breakupV256toV128s( src_vec
, &sHi
, &sLo
);
31731 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31732 mkexpr( math_MPSADBW_128(dHi
, sHi
, imm8
>> 3) ),
31733 mkexpr( math_MPSADBW_128(dLo
, sLo
, imm8
) ) ) );
31735 goto decode_success
;
31740 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31741 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31742 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31743 * Carry-less multiplication of selected XMM quadwords into XMM
31744 * registers (a.k.a multiplication of polynomials over GF(2))
31746 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31747 UChar modrm
= getUChar(delta
);
31749 IRTemp sV
= newTemp(Ity_V128
);
31750 IRTemp dV
= newTemp(Ity_V128
);
31751 UInt rG
= gregOfRexRM(pfx
, modrm
);
31752 UInt rV
= getVexNvvvv(pfx
);
31754 assign( dV
, getXMMReg(rV
) );
31756 if ( epartIsReg( modrm
) ) {
31757 UInt rE
= eregOfRexRM(pfx
, modrm
);
31758 imm8
= (Int
)getUChar(delta
+1);
31759 assign( sV
, getXMMReg(rE
) );
31761 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8
,
31762 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31764 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31765 1/* imm8 is 1 byte after the amode */ );
31766 assign( sV
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31767 imm8
= (Int
)getUChar(delta
+alen
);
31769 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31770 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31773 putYMMRegLoAndZU( rG
, mkexpr( math_PCLMULQDQ(dV
, sV
, imm8
) ) );
31775 goto decode_success
;
31780 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
31781 if (have66noF2noF3(pfx
)
31782 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31783 UChar modrm
= getUChar(delta
);
31785 UInt rG
= gregOfRexRM(pfx
, modrm
);
31786 UInt rV
= getVexNvvvv(pfx
);
31787 IRTemp s00
= newTemp(Ity_V128
);
31788 IRTemp s01
= newTemp(Ity_V128
);
31789 IRTemp s10
= newTemp(Ity_V128
);
31790 IRTemp s11
= newTemp(Ity_V128
);
31791 assign(s00
, getYMMRegLane128(rV
, 0));
31792 assign(s01
, getYMMRegLane128(rV
, 1));
31793 if (epartIsReg(modrm
)) {
31794 UInt rE
= eregOfRexRM(pfx
, modrm
);
31796 imm8
= getUChar(delta
);
31797 DIP("vperm2i128 $%u,%s,%s,%s\n",
31798 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31799 assign(s10
, getYMMRegLane128(rE
, 0));
31800 assign(s11
, getYMMRegLane128(rE
, 1));
31802 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31804 imm8
= getUChar(delta
);
31805 DIP("vperm2i128 $%u,%s,%s,%s\n",
31806 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31807 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
31808 mkexpr(addr
), mkU64(0))));
31809 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
31810 mkexpr(addr
), mkU64(16))));
31813 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
31814 : ((_nn)==2) ? s10 : s11)
31815 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
31816 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
31818 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
31819 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
31821 goto decode_success
;
31826 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
31827 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31828 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
31829 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31830 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
31831 "vblendvps", 4, Iop_SarN32x4
);
31833 goto decode_success
;
31835 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
31836 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31837 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
31838 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31839 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
31840 "vblendvps", 4, Iop_SarN32x4
);
31842 goto decode_success
;
31847 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
31848 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31849 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
31850 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31851 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
31852 "vblendvpd", 8, Iop_SarN64x2
);
31854 goto decode_success
;
31856 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
31857 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31858 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
31859 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31860 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
31861 "vblendvpd", 8, Iop_SarN64x2
);
31863 goto decode_success
;
31868 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
31869 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31870 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
31871 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31872 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
31873 "vpblendvb", 1, Iop_SarN8x16
);
31875 goto decode_success
;
31877 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
31878 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31879 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
31880 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31881 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
31882 "vpblendvb", 1, Iop_SarN8x16
);
31884 goto decode_success
;
31892 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
31893 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
31894 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
31895 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
31896 (selected special cases that actually occur in glibc,
31897 not by any means a complete implementation.)
31899 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31900 Long delta0
= delta
;
31901 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
31902 if (delta
> delta0
) goto decode_success
;
31903 /* else fall though; dis_PCMPxSTRx failed to decode it */
31907 case 0x5C ... 0x5F:
31908 case 0x68 ... 0x6F:
31909 case 0x78 ... 0x7F:
31910 /* FIXME: list the instructions decoded here */
31911 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31912 Long delta0
= delta
;
31913 delta
= dis_FMA4( pfx
, delta
, opc
, uses_vvvv
, vbi
);
31914 if (delta
> delta0
) {
31915 dres
->hint
= Dis_HintVerbose
;
31916 goto decode_success
;
31918 /* else fall though; dis_FMA4 failed to decode it */
31923 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
31924 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31925 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, True
/*!isAvx*/ );
31926 goto decode_success
;
31931 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
31932 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
31933 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
31934 Int size
= getRexW(pfx
) ? 8 : 4;
31935 IRType ty
= szToITy(size
);
31936 IRTemp src
= newTemp(ty
);
31937 UChar rm
= getUChar(delta
);
31940 if (epartIsReg(rm
)) {
31941 imm8
= getUChar(delta
+1);
31942 assign( src
, getIRegE(size
,pfx
,rm
) );
31943 DIP("rorx %d,%s,%s\n", imm8
, nameIRegE(size
,pfx
,rm
),
31944 nameIRegG(size
,pfx
,rm
));
31947 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
31948 imm8
= getUChar(delta
+alen
);
31949 assign( src
, loadLE(ty
, mkexpr(addr
)) );
31950 DIP("rorx %d,%s,%s\n", imm8
, dis_buf
, nameIRegG(size
,pfx
,rm
));
31955 /* dst = (src >>u imm8) | (src << (size-imm8)) */
31956 putIRegG( size
, pfx
, rm
,
31957 imm8
== 0 ? mkexpr(src
)
31958 : binop( mkSizedOp(ty
,Iop_Or8
),
31959 binop( mkSizedOp(ty
,Iop_Shr8
), mkexpr(src
),
31961 binop( mkSizedOp(ty
,Iop_Shl8
), mkexpr(src
),
31962 mkU8(8*size
-imm8
) ) ) );
31963 /* Flags aren't modified. */
31964 goto decode_success
;
31981 /*------------------------------------------------------------*/
31983 /*--- Disassemble a single instruction ---*/
31985 /*------------------------------------------------------------*/
31987 /* Disassemble a single instruction into IR. The instruction is
31988 located in host memory at &guest_code[delta]. */
31991 DisResult
disInstr_AMD64_WRK (
31992 /*OUT*/Bool
* expect_CAS
,
31993 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
31995 void* callback_opaque
,
31997 const VexArchInfo
* archinfo
,
31998 const VexAbiInfo
* vbi
,
32007 /* The running delta */
32008 Long delta
= delta64
;
32010 /* Holds eip at the start of the insn, so that we can print
32011 consistent error messages for unimplemented insns. */
32012 Long delta_start
= delta
;
32014 /* sz denotes the nominal data-op size of the insn; we change it to
32015 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32016 conflict REX.W takes precedence. */
32019 /* pfx holds the summary of prefixes. */
32020 Prefix pfx
= PFX_EMPTY
;
32022 /* Holds the computed opcode-escape indication. */
32023 Escape esc
= ESC_NONE
;
32025 /* Set result defaults. */
32026 dres
.whatNext
= Dis_Continue
;
32028 dres
.continueAt
= 0;
32029 dres
.jk_StopHere
= Ijk_INVALID
;
32030 dres
.hint
= Dis_HintNone
;
32031 *expect_CAS
= False
;
32033 vassert(guest_RIP_next_assumed
== 0);
32034 vassert(guest_RIP_next_mustcheck
== False
);
32036 t1
= t2
= IRTemp_INVALID
;
32038 DIP("\t0x%llx: ", guest_RIP_bbstart
+delta
);
32040 /* Spot "Special" instructions (see comment at top of file). */
32042 const UChar
* code
= guest_code
+ delta
;
32043 /* Spot the 16-byte preamble:
32044 48C1C703 rolq $3, %rdi
32045 48C1C70D rolq $13, %rdi
32046 48C1C73D rolq $61, %rdi
32047 48C1C733 rolq $51, %rdi
32049 if (code
[ 0] == 0x48 && code
[ 1] == 0xC1 && code
[ 2] == 0xC7
32050 && code
[ 3] == 0x03 &&
32051 code
[ 4] == 0x48 && code
[ 5] == 0xC1 && code
[ 6] == 0xC7
32052 && code
[ 7] == 0x0D &&
32053 code
[ 8] == 0x48 && code
[ 9] == 0xC1 && code
[10] == 0xC7
32054 && code
[11] == 0x3D &&
32055 code
[12] == 0x48 && code
[13] == 0xC1 && code
[14] == 0xC7
32056 && code
[15] == 0x33) {
32057 /* Got a "Special" instruction preamble. Which one is it? */
32058 if (code
[16] == 0x48 && code
[17] == 0x87
32059 && code
[18] == 0xDB /* xchgq %rbx,%rbx */) {
32060 /* %RDX = client_request ( %RAX ) */
32061 DIP("%%rdx = client_request ( %%rax )\n");
32063 jmp_lit(&dres
, Ijk_ClientReq
, guest_RIP_bbstart
+delta
);
32064 vassert(dres
.whatNext
== Dis_StopHere
);
32065 goto decode_success
;
32068 if (code
[16] == 0x48 && code
[17] == 0x87
32069 && code
[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32070 /* %RAX = guest_NRADDR */
32071 DIP("%%rax = guest_NRADDR\n");
32073 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
32074 goto decode_success
;
32077 if (code
[16] == 0x48 && code
[17] == 0x87
32078 && code
[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32079 /* call-noredir *%RAX */
32080 DIP("call-noredir *%%rax\n");
32082 t1
= newTemp(Ity_I64
);
32083 assign(t1
, getIRegRAX(8));
32084 t2
= newTemp(Ity_I64
);
32085 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
32086 putIReg64(R_RSP
, mkexpr(t2
));
32087 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
));
32088 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
32089 vassert(dres
.whatNext
== Dis_StopHere
);
32090 goto decode_success
;
32093 if (code
[16] == 0x48 && code
[17] == 0x87
32094 && code
[18] == 0xff /* xchgq %rdi,%rdi */) {
32096 DIP("IR injection\n");
32097 vex_inject_ir(irsb
, Iend_LE
);
32099 // Invalidate the current insn. The reason is that the IRop we're
32100 // injecting here can change. In which case the translation has to
32101 // be redone. For ease of handling, we simply invalidate all the
32103 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_RIP_curr_instr
)));
32104 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(19)));
32108 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32109 dres
.whatNext
= Dis_StopHere
;
32110 dres
.jk_StopHere
= Ijk_InvalICache
;
32111 goto decode_success
;
32113 /* We don't know what it is. */
32114 goto decode_failure
;
32119 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32120 as many invalid combinations as possible. */
32123 if (n_prefixes
> 7) goto decode_failure
;
32124 pre
= getUChar(delta
);
32126 case 0x66: pfx
|= PFX_66
; break;
32127 case 0x67: pfx
|= PFX_ASO
; break;
32128 case 0xF2: pfx
|= PFX_F2
; break;
32129 case 0xF3: pfx
|= PFX_F3
; break;
32130 case 0xF0: pfx
|= PFX_LOCK
; *expect_CAS
= True
; break;
32131 case 0x2E: pfx
|= PFX_CS
; break;
32132 case 0x3E: pfx
|= PFX_DS
; break;
32133 case 0x26: pfx
|= PFX_ES
; break;
32134 case 0x64: pfx
|= PFX_FS
; break;
32135 case 0x65: pfx
|= PFX_GS
; break;
32136 case 0x36: pfx
|= PFX_SS
; break;
32137 case 0x40 ... 0x4F:
32139 if (pre
& (1<<3)) pfx
|= PFX_REXW
;
32140 if (pre
& (1<<2)) pfx
|= PFX_REXR
;
32141 if (pre
& (1<<1)) pfx
|= PFX_REXX
;
32142 if (pre
& (1<<0)) pfx
|= PFX_REXB
;
32145 goto not_a_legacy_prefix
;
32151 not_a_legacy_prefix
:
32152 /* We've used up all the non-VEX prefixes. Parse and validate a
32153 VEX prefix if that's appropriate. */
32154 if (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
) {
32155 /* Used temporarily for holding VEX prefixes. */
32156 UChar vex0
= getUChar(delta
);
32157 if (vex0
== 0xC4) {
32159 UChar vex1
= getUChar(delta
+1);
32160 UChar vex2
= getUChar(delta
+2);
32163 /* Snarf contents of byte 1 */
32164 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32165 /* X */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_REXX
;
32166 /* B */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_REXB
;
32168 switch (vex1
& 0x1F) {
32169 case 1: esc
= ESC_0F
; break;
32170 case 2: esc
= ESC_0F38
; break;
32171 case 3: esc
= ESC_0F3A
; break;
32172 /* Any other m-mmmm field will #UD */
32173 default: goto decode_failure
;
32175 /* Snarf contents of byte 2 */
32176 /* W */ pfx
|= (vex2
& (1<<7)) ? PFX_REXW
: 0;
32177 /* ~v3 */ pfx
|= (vex2
& (1<<6)) ? 0 : PFX_VEXnV3
;
32178 /* ~v2 */ pfx
|= (vex2
& (1<<5)) ? 0 : PFX_VEXnV2
;
32179 /* ~v1 */ pfx
|= (vex2
& (1<<4)) ? 0 : PFX_VEXnV1
;
32180 /* ~v0 */ pfx
|= (vex2
& (1<<3)) ? 0 : PFX_VEXnV0
;
32181 /* L */ pfx
|= (vex2
& (1<<2)) ? PFX_VEXL
: 0;
32183 switch (vex2
& 3) {
32185 case 1: pfx
|= PFX_66
; break;
32186 case 2: pfx
|= PFX_F3
; break;
32187 case 3: pfx
|= PFX_F2
; break;
32188 default: vassert(0);
32191 else if (vex0
== 0xC5) {
32193 UChar vex1
= getUChar(delta
+1);
32196 /* Snarf contents of byte 1 */
32197 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32198 /* ~v3 */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_VEXnV3
;
32199 /* ~v2 */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_VEXnV2
;
32200 /* ~v1 */ pfx
|= (vex1
& (1<<4)) ? 0 : PFX_VEXnV1
;
32201 /* ~v0 */ pfx
|= (vex1
& (1<<3)) ? 0 : PFX_VEXnV0
;
32202 /* L */ pfx
|= (vex1
& (1<<2)) ? PFX_VEXL
: 0;
32204 switch (vex1
& 3) {
32206 case 1: pfx
|= PFX_66
; break;
32207 case 2: pfx
|= PFX_F3
; break;
32208 case 3: pfx
|= PFX_F2
; break;
32209 default: vassert(0);
32214 /* Can't have both VEX and REX */
32215 if ((pfx
& PFX_VEX
) && (pfx
& PFX_REX
))
32216 goto decode_failure
; /* can't have both */
32219 /* Dump invalid combinations */
32221 if (pfx
& PFX_F2
) n
++;
32222 if (pfx
& PFX_F3
) n
++;
32224 goto decode_failure
; /* can't have both */
32227 if (pfx
& PFX_CS
) n
++;
32228 if (pfx
& PFX_DS
) n
++;
32229 if (pfx
& PFX_ES
) n
++;
32230 if (pfx
& PFX_FS
) n
++;
32231 if (pfx
& PFX_GS
) n
++;
32232 if (pfx
& PFX_SS
) n
++;
32234 goto decode_failure
; /* multiple seg overrides == illegal */
32236 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32237 that we should accept it. */
32238 if ((pfx
& PFX_FS
) && !vbi
->guest_amd64_assume_fs_is_const
)
32239 goto decode_failure
;
32241 /* Ditto for %gs prefixes. */
32242 if ((pfx
& PFX_GS
) && !vbi
->guest_amd64_assume_gs_is_const
)
32243 goto decode_failure
;
32247 if (pfx
& PFX_66
) sz
= 2;
32248 if ((pfx
& PFX_REX
) && (pfx
& PFX_REXW
)) sz
= 8;
32250 /* Now we should be looking at the primary opcode byte or the
32251 leading escapes. Check that any LOCK prefix is actually
32253 if (haveLOCK(pfx
)) {
32254 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
32257 *expect_CAS
= False
;
32258 goto decode_failure
;
32262 /* Eat up opcode escape bytes, until we're really looking at the
32263 primary opcode byte. But only if there's no VEX present. */
32264 if (!(pfx
& PFX_VEX
)) {
32265 vassert(esc
== ESC_NONE
);
32266 pre
= getUChar(delta
);
32269 pre
= getUChar(delta
);
32271 case 0x38: esc
= ESC_0F38
; delta
++; break;
32272 case 0x3A: esc
= ESC_0F3A
; delta
++; break;
32273 default: esc
= ESC_0F
; break;
32278 /* So now we're really really looking at the primary opcode
32280 Long delta_at_primary_opcode
= delta
;
32282 if (!(pfx
& PFX_VEX
)) {
32283 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32284 instructions preserve the upper 128 bits of YMM registers;
32285 iow we can simply ignore the presence of the upper halves of
32286 these registers. */
32289 delta
= dis_ESC_NONE( &dres
, expect_CAS
,
32290 resteerOkFn
, resteerCisOk
, callback_opaque
,
32291 archinfo
, vbi
, pfx
, sz
, delta
);
32294 delta
= dis_ESC_0F ( &dres
, expect_CAS
,
32295 resteerOkFn
, resteerCisOk
, callback_opaque
,
32296 archinfo
, vbi
, pfx
, sz
, delta
);
32299 delta
= dis_ESC_0F38( &dres
,
32300 resteerOkFn
, resteerCisOk
, callback_opaque
,
32301 archinfo
, vbi
, pfx
, sz
, delta
);
32304 delta
= dis_ESC_0F3A( &dres
,
32305 resteerOkFn
, resteerCisOk
, callback_opaque
,
32306 archinfo
, vbi
, pfx
, sz
, delta
);
32312 /* VEX prefixed instruction */
32313 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32314 prefix that loads a YMM register operand ..." zeroes out bits
32315 128 and above of the register. */
32316 Bool uses_vvvv
= False
;
32319 delta
= dis_ESC_0F__VEX ( &dres
, &uses_vvvv
,
32320 resteerOkFn
, resteerCisOk
,
32322 archinfo
, vbi
, pfx
, sz
, delta
);
32325 delta
= dis_ESC_0F38__VEX ( &dres
, &uses_vvvv
,
32326 resteerOkFn
, resteerCisOk
,
32328 archinfo
, vbi
, pfx
, sz
, delta
);
32331 delta
= dis_ESC_0F3A__VEX ( &dres
, &uses_vvvv
,
32332 resteerOkFn
, resteerCisOk
,
32334 archinfo
, vbi
, pfx
, sz
, delta
);
32337 /* The presence of a VEX prefix, by Intel definition,
32338 always implies at least an 0F escape. */
32339 goto decode_failure
;
32343 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32346 if (getVexNvvvv(pfx
) != 0)
32347 goto decode_failure
;
32351 vassert(delta
- delta_at_primary_opcode
>= 0);
32352 vassert(delta
- delta_at_primary_opcode
< 16/*let's say*/);
32354 /* Use delta == delta_at_primary_opcode to denote decode failure.
32355 This implies that any successful decode must use at least one
32357 if (delta
== delta_at_primary_opcode
)
32358 goto decode_failure
;
32360 goto decode_success
; /* \o/ */
32364 /* All decode failures end up here. */
32366 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32367 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32368 getUChar(delta_start
+0),
32369 getUChar(delta_start
+1),
32370 getUChar(delta_start
+2),
32371 getUChar(delta_start
+3),
32372 getUChar(delta_start
+4),
32373 getUChar(delta_start
+5),
32374 getUChar(delta_start
+6),
32375 getUChar(delta_start
+7),
32376 getUChar(delta_start
+8),
32377 getUChar(delta_start
+9) );
32378 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32379 haveREX(pfx
) ? 1 : 0, getRexW(pfx
), getRexR(pfx
),
32380 getRexX(pfx
), getRexB(pfx
));
32381 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32382 haveVEX(pfx
) ? 1 : 0, getVexL(pfx
),
32384 esc
==ESC_NONE
? "NONE" :
32385 esc
==ESC_0F
? "0F" :
32386 esc
==ESC_0F38
? "0F38" :
32387 esc
==ESC_0F3A
? "0F3A" : "???");
32388 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32389 have66(pfx
) ? 1 : 0, haveF2(pfx
) ? 1 : 0,
32390 haveF3(pfx
) ? 1 : 0);
32393 /* Tell the dispatcher that this insn cannot be decoded, and so has
32394 not been executed, and (is currently) the next to be executed.
32395 RIP should be up-to-date since it made so at the start of each
32396 insn, but nevertheless be paranoid and update it again right
32398 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
32399 jmp_lit(&dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
32400 vassert(dres
.whatNext
== Dis_StopHere
);
32402 /* We also need to say that a CAS is not expected now, regardless
32403 of what it might have been set to at the start of the function,
32404 since the IR that we've emitted just above (to synthesis a
32405 SIGILL) does not involve any CAS, and presumably no other IR has
32406 been emitted for this (non-decoded) insn. */
32407 *expect_CAS
= False
;
32412 /* All decode successes end up here. */
32413 switch (dres
.whatNext
) {
32415 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32419 stmt( IRStmt_Put( OFFB_RIP
, mkU64(dres
.continueAt
) ) );
32428 dres
.len
= toUInt(delta
- delta_start
);
32436 /*------------------------------------------------------------*/
32437 /*--- Top-level fn ---*/
32438 /*------------------------------------------------------------*/
32440 /* Disassemble a single instruction into IR. The instruction
32441 is located in host memory at &guest_code[delta]. */
32443 DisResult
disInstr_AMD64 ( IRSB
* irsb_IN
,
32444 Bool (*resteerOkFn
) ( void*, Addr
),
32446 void* callback_opaque
,
32447 const UChar
* guest_code_IN
,
32450 VexArch guest_arch
,
32451 const VexArchInfo
* archinfo
,
32452 const VexAbiInfo
* abiinfo
,
32453 VexEndness host_endness_IN
,
32454 Bool sigill_diag_IN
)
32457 Bool expect_CAS
, has_CAS
;
32460 /* Set globals (see top of this file) */
32461 vassert(guest_arch
== VexArchAMD64
);
32462 guest_code
= guest_code_IN
;
32464 host_endness
= host_endness_IN
;
32465 guest_RIP_curr_instr
= guest_IP
;
32466 guest_RIP_bbstart
= guest_IP
- delta
;
32468 /* We'll consult these after doing disInstr_AMD64_WRK. */
32469 guest_RIP_next_assumed
= 0;
32470 guest_RIP_next_mustcheck
= False
;
32472 x1
= irsb_IN
->stmts_used
;
32473 expect_CAS
= False
;
32474 dres
= disInstr_AMD64_WRK ( &expect_CAS
, resteerOkFn
,
32477 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32478 x2
= irsb_IN
->stmts_used
;
32481 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32482 got it right. Failure of this assertion is serious and denotes
32483 a bug in disInstr. */
32484 if (guest_RIP_next_mustcheck
32485 && guest_RIP_next_assumed
!= guest_RIP_curr_instr
+ dres
.len
) {
32487 vex_printf("assumed next %%rip = 0x%llx\n",
32488 guest_RIP_next_assumed
);
32489 vex_printf(" actual next %%rip = 0x%llx\n",
32490 guest_RIP_curr_instr
+ dres
.len
);
32491 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32494 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32495 expect_CAS. Here, we (sanity-)check for the presence/absence of
32496 IRCAS as directed by the returned expect_CAS value. */
32498 for (i
= x1
; i
< x2
; i
++) {
32499 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
32503 if (expect_CAS
!= has_CAS
) {
32504 /* inconsistency detected. re-disassemble the instruction so as
32505 to generate a useful error message; then assert. */
32506 vex_traceflags
|= VEX_TRACE_FE
;
32507 dres
= disInstr_AMD64_WRK ( &expect_CAS
, resteerOkFn
,
32510 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32511 for (i
= x1
; i
< x2
; i
++) {
32512 vex_printf("\t\t");
32513 ppIRStmt(irsb_IN
->stmts
[i
]);
32516 /* Failure of this assertion is serious and denotes a bug in
32518 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32525 /*------------------------------------------------------------*/
32526 /*--- Unused stuff ---*/
32527 /*------------------------------------------------------------*/
32529 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32530 // this should ever be needed.
32532 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32534 // /* Scheme is simple: propagate the most significant 1-bit into all
32535 // lower positions in the word. This gives a word of the form
32536 // 0---01---1. Now invert it, giving a word of the form
32537 // 1---10---0, then do a population-count idiom (to count the 1s,
32538 // which is the number of leading zeroes, or the word size if the
32539 // original word was 0.
32543 // for (i = 0; i < 7; i++) {
32544 // t[i] = newTemp(ty);
32546 // if (ty == Ity_I64) {
32547 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32548 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32549 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32550 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32551 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32552 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32553 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32554 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32555 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32556 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32557 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32558 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32559 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32560 // return gen_POPCOUNT(ty, t[6]);
32562 // if (ty == Ity_I32) {
32563 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32564 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32565 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32566 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32567 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32568 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32569 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32570 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32571 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32572 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32573 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32574 // return gen_POPCOUNT(ty, t[5]);
32576 // if (ty == Ity_I16) {
32577 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32578 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32579 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32580 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32581 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32582 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32583 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32584 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32585 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32586 // return gen_POPCOUNT(ty, t[4]);
32592 /*--------------------------------------------------------------------*/
32593 /*--- end guest_amd64_toIR.c ---*/
32594 /*--------------------------------------------------------------------*/