Adjust ppc set_AV_CR6 computation to help Memcheck instrumentation.
[valgrind.git] / VEX / priv / guest_amd64_toIR.c
blob2cabf80c9fe9244b56b88d7f3ef395b6a26b2f68
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates AMD64 code to IR. */
38 /* TODO:
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
43 x87 FP Limitations:
45 * all arithmetic done at 64 bits
47 * no FP exceptions, except for handling stack over/underflow
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
54 * some of the FCOM cases could do with testing -- not convinced
55 that the args are the right way round.
57 * FSAVE does not re-initialise the FPU; it should do
59 * FINIT not only initialises the FPU environment, it also zeroes
60 all the FP registers. It should leave the registers unchanged.
62 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
63 per Intel docs this bit has no meaning anyway. Since PUSHF is the
64 only way to observe eflags[1], a proper fix would be to make that
65 bit be set by PUSHF.
67 This module uses global variables and so is not MT-safe (if that
68 should ever become relevant).
71 /* Notes re address size overrides (0x67).
73 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
75 and System Instructions"), Section 1.2.3 ("Address-Size Override
76 Prefix"):
78 0x67 applies to all explicit memory references, causing the top
79 32 bits of the effective address to become zero.
81 0x67 has no effect on stack references (push/pop); these always
82 use a 64-bit address.
84 0x67 changes the interpretation of instructions which implicitly
85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
86 instead. These are:
88 cmp{s,sb,sw,sd,sq}
89 in{s,sb,sw,sd}
90 jcxz, jecxz, jrcxz
91 lod{s,sb,sw,sd,sq}
92 loop{,e,bz,be,z}
93 mov{s,sb,sw,sd,sq}
94 out{s,sb,sw,sd}
95 rep{,e,ne,nz}
96 sca{s,sb,sw,sd,sq}
97 sto{s,sb,sw,sd,sq}
98 xlat{,b} */
100 /* "Special" instructions.
102 This instruction decoder can decode three special instructions
103 which mean nothing natively (are no-ops as far as regs/mem are
104 concerned) but have meaning for supporting Valgrind. A special
105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
108 Following that, one of the following 3 are allowed (standard
109 interpretation in parentheses):
111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
114 4887F6 (xchgq %rdi,%rdi) IR injection
116 Any other bytes following the 16-byte preamble are illegal and
117 constitute a failure in instruction decoding. This all assumes
118 that the preamble will never occur except in specific code
119 fragments designed for Valgrind to catch.
121 No prefixes may precede a "Special" instruction.
124 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
125 insns: the side-exit back to the start of the insn is done with
126 Ijk_Boring. This is quite wrong, it should be done with
127 Ijk_NoRedir, since otherwise the side exit, which is intended to
128 restart the instruction for whatever reason, could go somewhere
129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
130 no-redir jumps performance critical, at least for rep-prefixed
131 instructions, since all iterations thereof would involve such a
132 jump. It's not such a big deal with casLE since the side exit is
133 only taken if the CAS fails, that is, the location is contended,
134 which is relatively unlikely.
136 Note also, the test for CAS success vs failure is done using
137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
139 shouldn't definedness-check these comparisons. See
140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
141 background/rationale.
144 /* LOCK prefixed instructions. These are translated using IR-level
145 CAS statements (IRCAS) and are believed to preserve atomicity, even
146 from the point of view of some other process racing against a
147 simulated one (presumably they communicate via a shared memory
148 segment).
150 Handlers which are aware of LOCK prefixes are:
151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
152 dis_cmpxchg_G_E (cmpxchg)
153 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
154 dis_Grp3 (not, neg)
155 dis_Grp4 (inc, dec)
156 dis_Grp5 (inc, dec)
157 dis_Grp8_Imm (bts, btc, btr)
158 dis_bt_G_E (bts, btc, btr)
159 dis_xadd_G_E (xadd)
163 #include "libvex_basictypes.h"
164 #include "libvex_ir.h"
165 #include "libvex.h"
166 #include "libvex_guest_amd64.h"
168 #include "main_util.h"
169 #include "main_globals.h"
170 #include "guest_generic_bb_to_IR.h"
171 #include "guest_generic_x87.h"
172 #include "guest_amd64_defs.h"
175 /*------------------------------------------------------------*/
176 /*--- Globals ---*/
177 /*------------------------------------------------------------*/
179 /* These are set at the start of the translation of an insn, right
180 down in disInstr_AMD64, so that we don't have to pass them around
181 endlessly. They are all constant during the translation of any
182 given insn. */
184 /* These are set at the start of the translation of a BB, so
185 that we don't have to pass them around endlessly. */
187 /* We need to know this to do sub-register accesses correctly. */
188 static VexEndness host_endness;
190 /* Pointer to the guest code area (points to start of BB, not to the
191 insn being processed). */
192 static const UChar* guest_code;
194 /* The guest address corresponding to guest_code[0]. */
195 static Addr64 guest_RIP_bbstart;
197 /* The guest address for the instruction currently being
198 translated. */
199 static Addr64 guest_RIP_curr_instr;
201 /* The IRSB* into which we're generating code. */
202 static IRSB* irsb;
204 /* For ensuring that %rip-relative addressing is done right. A read
205 of %rip generates the address of the next instruction. It may be
206 that we don't conveniently know that inside disAMode(). For sanity
207 checking, if the next insn %rip is needed, we make a guess at what
208 it is, record that guess here, and set the accompanying Bool to
209 indicate that -- after this insn's decode is finished -- that guess
210 needs to be checked. */
212 /* At the start of each insn decode, is set to (0, False).
213 After the decode, if _mustcheck is now True, _assumed is
214 checked. */
216 static Addr64 guest_RIP_next_assumed;
217 static Bool guest_RIP_next_mustcheck;
220 /*------------------------------------------------------------*/
221 /*--- Helpers for constructing IR. ---*/
222 /*------------------------------------------------------------*/
224 /* Generate a new temporary of the given type. */
225 static IRTemp newTemp ( IRType ty )
227 vassert(isPlausibleIRType(ty));
228 return newIRTemp( irsb->tyenv, ty );
231 /* Add a statement to the list held by "irsb". */
232 static void stmt ( IRStmt* st )
234 addStmtToIRSB( irsb, st );
237 /* Generate a statement "dst := e". */
238 static void assign ( IRTemp dst, IRExpr* e )
240 stmt( IRStmt_WrTmp(dst, e) );
243 static IRExpr* unop ( IROp op, IRExpr* a )
245 return IRExpr_Unop(op, a);
248 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
250 return IRExpr_Binop(op, a1, a2);
253 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
255 return IRExpr_Triop(op, a1, a2, a3);
258 static IRExpr* mkexpr ( IRTemp tmp )
260 return IRExpr_RdTmp(tmp);
263 static IRExpr* mkU8 ( ULong i )
265 vassert(i < 256);
266 return IRExpr_Const(IRConst_U8( (UChar)i ));
269 static IRExpr* mkU16 ( ULong i )
271 vassert(i < 0x10000ULL);
272 return IRExpr_Const(IRConst_U16( (UShort)i ));
275 static IRExpr* mkU32 ( ULong i )
277 vassert(i < 0x100000000ULL);
278 return IRExpr_Const(IRConst_U32( (UInt)i ));
281 static IRExpr* mkU64 ( ULong i )
283 return IRExpr_Const(IRConst_U64(i));
286 static IRExpr* mkU ( IRType ty, ULong i )
288 switch (ty) {
289 case Ity_I8: return mkU8(i);
290 case Ity_I16: return mkU16(i);
291 case Ity_I32: return mkU32(i);
292 case Ity_I64: return mkU64(i);
293 default: vpanic("mkU(amd64)");
297 static void storeLE ( IRExpr* addr, IRExpr* data )
299 stmt( IRStmt_Store(Iend_LE, addr, data) );
302 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
304 return IRExpr_Load(Iend_LE, ty, addr);
307 static IROp mkSizedOp ( IRType ty, IROp op8 )
309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
310 || op8 == Iop_Mul8
311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
314 || op8 == Iop_CasCmpNE8
315 || op8 == Iop_Not8 );
316 switch (ty) {
317 case Ity_I8: return 0 +op8;
318 case Ity_I16: return 1 +op8;
319 case Ity_I32: return 2 +op8;
320 case Ity_I64: return 3 +op8;
321 default: vpanic("mkSizedOp(amd64)");
325 static
326 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
328 if (szSmall == 1 && szBig == 4) {
329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
331 if (szSmall == 1 && szBig == 2) {
332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
334 if (szSmall == 2 && szBig == 4) {
335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
337 if (szSmall == 1 && szBig == 8 && !signd) {
338 return unop(Iop_8Uto64, src);
340 if (szSmall == 1 && szBig == 8 && signd) {
341 return unop(Iop_8Sto64, src);
343 if (szSmall == 2 && szBig == 8 && !signd) {
344 return unop(Iop_16Uto64, src);
346 if (szSmall == 2 && szBig == 8 && signd) {
347 return unop(Iop_16Sto64, src);
349 vpanic("doScalarWidening(amd64)");
352 static
353 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value )
355 IRType ty = typeOfIRExpr(irsb->tyenv, value);
356 stmt( IRStmt_Put(gstOffB,
357 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) );
361 /*------------------------------------------------------------*/
362 /*--- Debugging output ---*/
363 /*------------------------------------------------------------*/
365 /* Bomb out if we can't handle something. */
366 __attribute__ ((noreturn))
367 static void unimplemented ( const HChar* str )
369 vex_printf("amd64toIR: unimplemented feature\n");
370 vpanic(str);
373 #define DIP(format, args...) \
374 if (vex_traceflags & VEX_TRACE_FE) \
375 vex_printf(format, ## args)
377 #define DIS(buf, format, args...) \
378 if (vex_traceflags & VEX_TRACE_FE) \
379 vex_sprintf(buf, format, ## args)
382 /*------------------------------------------------------------*/
383 /*--- Offsets of various parts of the amd64 guest state. ---*/
384 /*------------------------------------------------------------*/
386 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
387 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
388 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
389 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
390 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
391 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
392 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
393 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
394 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
395 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
396 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
397 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
398 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
399 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
400 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
401 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
403 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
405 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
406 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
408 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
409 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
410 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
411 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
413 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
414 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
415 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
416 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
417 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
418 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
419 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
420 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
422 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
423 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
424 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
425 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
426 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
427 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
428 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
429 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
430 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
431 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
432 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
433 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
434 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
435 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
436 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
437 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
438 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
439 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
441 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
442 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
443 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
445 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
448 /*------------------------------------------------------------*/
449 /*--- Helper bits and pieces for deconstructing the ---*/
450 /*--- amd64 insn stream. ---*/
451 /*------------------------------------------------------------*/
453 /* This is the AMD64 register encoding -- integer regs. */
454 #define R_RAX 0
455 #define R_RCX 1
456 #define R_RDX 2
457 #define R_RBX 3
458 #define R_RSP 4
459 #define R_RBP 5
460 #define R_RSI 6
461 #define R_RDI 7
462 #define R_R8 8
463 #define R_R9 9
464 #define R_R10 10
465 #define R_R11 11
466 #define R_R12 12
467 #define R_R13 13
468 #define R_R14 14
469 #define R_R15 15
471 /* This is the Intel register encoding -- segment regs. */
472 #define R_ES 0
473 #define R_CS 1
474 #define R_SS 2
475 #define R_DS 3
476 #define R_FS 4
477 #define R_GS 5
480 /* Various simple conversions */
482 static ULong extend_s_8to64 ( UChar x )
484 return (ULong)((Long)(((ULong)x) << 56) >> 56);
487 static ULong extend_s_16to64 ( UShort x )
489 return (ULong)((Long)(((ULong)x) << 48) >> 48);
492 static ULong extend_s_32to64 ( UInt x )
494 return (ULong)((Long)(((ULong)x) << 32) >> 32);
497 /* Figure out whether the mod and rm parts of a modRM byte refer to a
498 register or memory. If so, the byte will have the form 11XXXYYY,
499 where YYY is the register number. */
500 inline
501 static Bool epartIsReg ( UChar mod_reg_rm )
503 return toBool(0xC0 == (mod_reg_rm & 0xC0));
506 /* Extract the 'g' field from a modRM byte. This only produces 3
507 bits, which is not a complete register number. You should avoid
508 this function if at all possible. */
509 inline
510 static Int gregLO3ofRM ( UChar mod_reg_rm )
512 return (Int)( (mod_reg_rm >> 3) & 7 );
515 /* Ditto the 'e' field of a modRM byte. */
516 inline
517 static Int eregLO3ofRM ( UChar mod_reg_rm )
519 return (Int)(mod_reg_rm & 0x7);
522 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
524 static inline UChar getUChar ( Long delta )
526 UChar v = guest_code[delta+0];
527 return v;
530 static UInt getUDisp16 ( Long delta )
532 UInt v = guest_code[delta+1]; v <<= 8;
533 v |= guest_code[delta+0];
534 return v & 0xFFFF;
537 //.. static UInt getUDisp ( Int size, Long delta )
538 //.. {
539 //.. switch (size) {
540 //.. case 4: return getUDisp32(delta);
541 //.. case 2: return getUDisp16(delta);
542 //.. case 1: return getUChar(delta);
543 //.. default: vpanic("getUDisp(x86)");
544 //.. }
545 //.. return 0; /*notreached*/
546 //.. }
549 /* Get a byte value out of the insn stream and sign-extend to 64
550 bits. */
551 static Long getSDisp8 ( Long delta )
553 return extend_s_8to64( guest_code[delta] );
556 /* Get a 16-bit value out of the insn stream and sign-extend to 64
557 bits. */
558 static Long getSDisp16 ( Long delta )
560 UInt v = guest_code[delta+1]; v <<= 8;
561 v |= guest_code[delta+0];
562 return extend_s_16to64( (UShort)v );
565 /* Get a 32-bit value out of the insn stream and sign-extend to 64
566 bits. */
567 static Long getSDisp32 ( Long delta )
569 UInt v = guest_code[delta+3]; v <<= 8;
570 v |= guest_code[delta+2]; v <<= 8;
571 v |= guest_code[delta+1]; v <<= 8;
572 v |= guest_code[delta+0];
573 return extend_s_32to64( v );
576 /* Get a 64-bit value out of the insn stream. */
577 static Long getDisp64 ( Long delta )
579 ULong v = 0;
580 v |= guest_code[delta+7]; v <<= 8;
581 v |= guest_code[delta+6]; v <<= 8;
582 v |= guest_code[delta+5]; v <<= 8;
583 v |= guest_code[delta+4]; v <<= 8;
584 v |= guest_code[delta+3]; v <<= 8;
585 v |= guest_code[delta+2]; v <<= 8;
586 v |= guest_code[delta+1]; v <<= 8;
587 v |= guest_code[delta+0];
588 return v;
591 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
592 if this is called with size==8. Should not happen. */
593 static Long getSDisp ( Int size, Long delta )
595 switch (size) {
596 case 4: return getSDisp32(delta);
597 case 2: return getSDisp16(delta);
598 case 1: return getSDisp8(delta);
599 default: vpanic("getSDisp(amd64)");
603 static ULong mkSizeMask ( Int sz )
605 switch (sz) {
606 case 1: return 0x00000000000000FFULL;
607 case 2: return 0x000000000000FFFFULL;
608 case 4: return 0x00000000FFFFFFFFULL;
609 case 8: return 0xFFFFFFFFFFFFFFFFULL;
610 default: vpanic("mkSzMask(amd64)");
614 static Int imin ( Int a, Int b )
616 return (a < b) ? a : b;
619 static IRType szToITy ( Int n )
621 switch (n) {
622 case 1: return Ity_I8;
623 case 2: return Ity_I16;
624 case 4: return Ity_I32;
625 case 8: return Ity_I64;
626 default: vex_printf("\nszToITy(%d)\n", n);
627 vpanic("szToITy(amd64)");
632 /*------------------------------------------------------------*/
633 /*--- For dealing with prefixes. ---*/
634 /*------------------------------------------------------------*/
636 /* The idea is to pass around an int holding a bitmask summarising
637 info from the prefixes seen on the current instruction, including
638 info from the REX byte. This info is used in various places, but
639 most especially when making sense of register fields in
640 instructions.
642 The top 8 bits of the prefix are 0x55, just as a hacky way to
643 ensure it really is a valid prefix.
645 Things you can safely assume about a well-formed prefix:
646 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
647 * if REX is not present then REXW,REXR,REXX,REXB will read
648 as zero.
649 * F2 and F3 will not both be 1.
652 typedef UInt Prefix;
654 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
655 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
656 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
657 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
658 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
659 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
660 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
661 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
662 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
663 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
664 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
665 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
666 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
667 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
668 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
669 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
670 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
671 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
672 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
673 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
674 positions. */
675 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
676 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
677 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
678 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
681 #define PFX_EMPTY 0x55000000
683 static Bool IS_VALID_PFX ( Prefix pfx ) {
684 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
687 static Bool haveREX ( Prefix pfx ) {
688 return toBool(pfx & PFX_REX);
691 static Int getRexW ( Prefix pfx ) {
692 return (pfx & PFX_REXW) ? 1 : 0;
694 static Int getRexR ( Prefix pfx ) {
695 return (pfx & PFX_REXR) ? 1 : 0;
697 static Int getRexX ( Prefix pfx ) {
698 return (pfx & PFX_REXX) ? 1 : 0;
700 static Int getRexB ( Prefix pfx ) {
701 return (pfx & PFX_REXB) ? 1 : 0;
704 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
705 completely changes what instruction it really is. */
706 static Bool haveF2orF3 ( Prefix pfx ) {
707 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
709 static Bool haveF2andF3 ( Prefix pfx ) {
710 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
712 static Bool haveF2 ( Prefix pfx ) {
713 return toBool((pfx & PFX_F2) > 0);
715 static Bool haveF3 ( Prefix pfx ) {
716 return toBool((pfx & PFX_F3) > 0);
719 static Bool have66 ( Prefix pfx ) {
720 return toBool((pfx & PFX_66) > 0);
722 static Bool haveASO ( Prefix pfx ) {
723 return toBool((pfx & PFX_ASO) > 0);
725 static Bool haveLOCK ( Prefix pfx ) {
726 return toBool((pfx & PFX_LOCK) > 0);
729 /* Return True iff pfx has 66 set and F2 and F3 clear */
730 static Bool have66noF2noF3 ( Prefix pfx )
732 return
733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
736 /* Return True iff pfx has F2 set and 66 and F3 clear */
737 static Bool haveF2no66noF3 ( Prefix pfx )
739 return
740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
743 /* Return True iff pfx has F3 set and 66 and F2 clear */
744 static Bool haveF3no66noF2 ( Prefix pfx )
746 return
747 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
750 /* Return True iff pfx has F3 set and F2 clear */
751 static Bool haveF3noF2 ( Prefix pfx )
753 return
754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
757 /* Return True iff pfx has F2 set and F3 clear */
758 static Bool haveF2noF3 ( Prefix pfx )
760 return
761 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
764 /* Return True iff pfx has 66, F2 and F3 clear */
765 static Bool haveNo66noF2noF3 ( Prefix pfx )
767 return
768 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
771 /* Return True iff pfx has any of 66, F2 and F3 set */
772 static Bool have66orF2orF3 ( Prefix pfx )
774 return toBool( ! haveNo66noF2noF3(pfx) );
777 /* Return True iff pfx has 66 or F3 set */
778 static Bool have66orF3 ( Prefix pfx )
780 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
783 /* Clear all the segment-override bits in a prefix. */
784 static Prefix clearSegBits ( Prefix p )
786 return
787 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
790 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
791 static UInt getVexNvvvv ( Prefix pfx ) {
792 UInt r = (UInt)pfx;
793 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
794 return r & 0xF;
797 static Bool haveVEX ( Prefix pfx ) {
798 return toBool(pfx & PFX_VEX);
801 static Int getVexL ( Prefix pfx ) {
802 return (pfx & PFX_VEXL) ? 1 : 0;
806 /*------------------------------------------------------------*/
807 /*--- For dealing with escapes ---*/
808 /*------------------------------------------------------------*/
811 /* Escapes come after the prefixes, but before the primary opcode
812 byte. They escape the primary opcode byte into a bigger space.
813 The 0xF0000000 isn't significant, except so as to make it not
814 overlap valid Prefix values, for sanity checking.
817 typedef
818 enum {
819 ESC_NONE=0xF0000000, // none
820 ESC_0F, // 0F
821 ESC_0F38, // 0F 38
822 ESC_0F3A // 0F 3A
824 Escape;
827 /*------------------------------------------------------------*/
828 /*--- For dealing with integer registers ---*/
829 /*------------------------------------------------------------*/
831 /* This is somewhat complex. The rules are:
833 For 64, 32 and 16 bit register references, the e or g fields in the
834 modrm bytes supply the low 3 bits of the register number. The
835 fourth (most-significant) bit of the register number is supplied by
836 the REX byte, if it is present; else that bit is taken to be zero.
838 The REX.R bit supplies the high bit corresponding to the g register
839 field, and the REX.B bit supplies the high bit corresponding to the
840 e register field (when the mod part of modrm indicates that modrm's
841 e component refers to a register and not to memory).
843 The REX.X bit supplies a high register bit for certain registers
844 in SIB address modes, and is generally rarely used.
846 For 8 bit register references, the presence of the REX byte itself
847 has significance. If there is no REX present, then the 3-bit
848 number extracted from the modrm e or g field is treated as an index
849 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
850 old x86 encoding scheme.
852 But if there is a REX present, the register reference is
853 interpreted in the same way as for 64/32/16-bit references: a high
854 bit is extracted from REX, giving a 4-bit number, and the denoted
855 register is the lowest 8 bits of the 16 integer registers denoted
856 by the number. In particular, values 3 through 7 of this sequence
857 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
858 %rsp %rbp %rsi %rdi.
860 The REX.W bit has no bearing at all on register numbers. Instead
861 its presence indicates that the operand size is to be overridden
862 from its default value (32 bits) to 64 bits instead. This is in
863 the same fashion that an 0x66 prefix indicates the operand size is
864 to be overridden from 32 bits down to 16 bits. When both REX.W and
865 0x66 are present there is a conflict, and REX.W takes precedence.
867 Rather than try to handle this complexity using a single huge
868 function, several smaller ones are provided. The aim is to make it
869 as difficult as possible to screw up register decoding in a subtle
870 and hard-to-track-down way.
872 Because these routines fish around in the host's memory (that is,
873 in the guest state area) for sub-parts of guest registers, their
874 correctness depends on the host's endianness. So far these
875 routines only work for little-endian hosts. Those for which
876 endianness is important have assertions to ensure sanity.
880 /* About the simplest question you can ask: where do the 64-bit
881 integer registers live (in the guest state) ? */
883 static Int integerGuestReg64Offset ( UInt reg )
885 switch (reg) {
886 case R_RAX: return OFFB_RAX;
887 case R_RCX: return OFFB_RCX;
888 case R_RDX: return OFFB_RDX;
889 case R_RBX: return OFFB_RBX;
890 case R_RSP: return OFFB_RSP;
891 case R_RBP: return OFFB_RBP;
892 case R_RSI: return OFFB_RSI;
893 case R_RDI: return OFFB_RDI;
894 case R_R8: return OFFB_R8;
895 case R_R9: return OFFB_R9;
896 case R_R10: return OFFB_R10;
897 case R_R11: return OFFB_R11;
898 case R_R12: return OFFB_R12;
899 case R_R13: return OFFB_R13;
900 case R_R14: return OFFB_R14;
901 case R_R15: return OFFB_R15;
902 default: vpanic("integerGuestReg64Offset(amd64)");
907 /* Produce the name of an integer register, for printing purposes.
908 reg is a number in the range 0 .. 15 that has been generated from a
909 3-bit reg-field number and a REX extension bit. irregular denotes
910 the case where sz==1 and no REX byte is present. */
912 static
913 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
915 static const HChar* ireg64_names[16]
916 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
917 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
918 static const HChar* ireg32_names[16]
919 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
920 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
921 static const HChar* ireg16_names[16]
922 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
923 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
924 static const HChar* ireg8_names[16]
925 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
926 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
927 static const HChar* ireg8_irregular[8]
928 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
930 vassert(reg < 16);
931 if (sz == 1) {
932 if (irregular)
933 vassert(reg < 8);
934 } else {
935 vassert(irregular == False);
938 switch (sz) {
939 case 8: return ireg64_names[reg];
940 case 4: return ireg32_names[reg];
941 case 2: return ireg16_names[reg];
942 case 1: if (irregular) {
943 return ireg8_irregular[reg];
944 } else {
945 return ireg8_names[reg];
947 default: vpanic("nameIReg(amd64)");
951 /* Using the same argument conventions as nameIReg, produce the
952 guest state offset of an integer register. */
954 static
955 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
957 vassert(reg < 16);
958 if (sz == 1) {
959 if (irregular)
960 vassert(reg < 8);
961 } else {
962 vassert(irregular == False);
965 /* Deal with irregular case -- sz==1 and no REX present */
966 if (sz == 1 && irregular) {
967 switch (reg) {
968 case R_RSP: return 1+ OFFB_RAX;
969 case R_RBP: return 1+ OFFB_RCX;
970 case R_RSI: return 1+ OFFB_RDX;
971 case R_RDI: return 1+ OFFB_RBX;
972 default: break; /* use the normal case */
976 /* Normal case */
977 return integerGuestReg64Offset(reg);
981 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
983 static IRExpr* getIRegCL ( void )
985 vassert(host_endness == VexEndnessLE);
986 return IRExpr_Get( OFFB_RCX, Ity_I8 );
990 /* Write to the %AH register. */
992 static void putIRegAH ( IRExpr* e )
994 vassert(host_endness == VexEndnessLE);
995 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
996 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
1000 /* Read/write various widths of %RAX, as it has various
1001 special-purpose uses. */
1003 static const HChar* nameIRegRAX ( Int sz )
1005 switch (sz) {
1006 case 1: return "%al";
1007 case 2: return "%ax";
1008 case 4: return "%eax";
1009 case 8: return "%rax";
1010 default: vpanic("nameIRegRAX(amd64)");
1014 static IRExpr* getIRegRAX ( Int sz )
1016 vassert(host_endness == VexEndnessLE);
1017 switch (sz) {
1018 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
1019 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
1020 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1021 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1022 default: vpanic("getIRegRAX(amd64)");
1026 static void putIRegRAX ( Int sz, IRExpr* e )
1028 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1029 vassert(host_endness == VexEndnessLE);
1030 switch (sz) {
1031 case 8: vassert(ty == Ity_I64);
1032 stmt( IRStmt_Put( OFFB_RAX, e ));
1033 break;
1034 case 4: vassert(ty == Ity_I32);
1035 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1036 break;
1037 case 2: vassert(ty == Ity_I16);
1038 stmt( IRStmt_Put( OFFB_RAX, e ));
1039 break;
1040 case 1: vassert(ty == Ity_I8);
1041 stmt( IRStmt_Put( OFFB_RAX, e ));
1042 break;
1043 default: vpanic("putIRegRAX(amd64)");
1048 /* Read/write various widths of %RDX, as it has various
1049 special-purpose uses. */
1051 static const HChar* nameIRegRDX ( Int sz )
1053 switch (sz) {
1054 case 1: return "%dl";
1055 case 2: return "%dx";
1056 case 4: return "%edx";
1057 case 8: return "%rdx";
1058 default: vpanic("nameIRegRDX(amd64)");
1062 static IRExpr* getIRegRDX ( Int sz )
1064 vassert(host_endness == VexEndnessLE);
1065 switch (sz) {
1066 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1067 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
1068 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1069 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1070 default: vpanic("getIRegRDX(amd64)");
1074 static void putIRegRDX ( Int sz, IRExpr* e )
1076 vassert(host_endness == VexEndnessLE);
1077 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1078 switch (sz) {
1079 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1080 break;
1081 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1082 break;
1083 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1084 break;
1085 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1086 break;
1087 default: vpanic("putIRegRDX(amd64)");
1092 /* Simplistic functions to deal with the integer registers as a
1093 straightforward bank of 16 64-bit regs. */
1095 static IRExpr* getIReg64 ( UInt regno )
1097 return IRExpr_Get( integerGuestReg64Offset(regno),
1098 Ity_I64 );
1101 static void putIReg64 ( UInt regno, IRExpr* e )
1103 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1104 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1107 static const HChar* nameIReg64 ( UInt regno )
1109 return nameIReg( 8, regno, False );
1113 /* Simplistic functions to deal with the lower halves of integer
1114 registers as a straightforward bank of 16 32-bit regs. */
1116 static IRExpr* getIReg32 ( UInt regno )
1118 vassert(host_endness == VexEndnessLE);
1119 return unop(Iop_64to32,
1120 IRExpr_Get( integerGuestReg64Offset(regno),
1121 Ity_I64 ));
1124 static void putIReg32 ( UInt regno, IRExpr* e )
1126 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1127 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1128 unop(Iop_32Uto64,e) ) );
1131 static const HChar* nameIReg32 ( UInt regno )
1133 return nameIReg( 4, regno, False );
1137 /* Simplistic functions to deal with the lower quarters of integer
1138 registers as a straightforward bank of 16 16-bit regs. */
1140 static IRExpr* getIReg16 ( UInt regno )
1142 vassert(host_endness == VexEndnessLE);
1143 return IRExpr_Get( integerGuestReg64Offset(regno),
1144 Ity_I16 );
1147 static void putIReg16 ( UInt regno, IRExpr* e )
1149 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1150 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1151 unop(Iop_16Uto64,e) ) );
1154 static const HChar* nameIReg16 ( UInt regno )
1156 return nameIReg( 2, regno, False );
1160 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1161 which field of the REX byte is to be used to extend to a 4-bit
1162 number. These functions cater for that situation.
1164 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1166 vassert(lo3bits < 8);
1167 vassert(IS_VALID_PFX(pfx));
1168 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1171 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1173 vassert(lo3bits < 8);
1174 vassert(IS_VALID_PFX(pfx));
1175 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1178 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1180 vassert(lo3bits < 8);
1181 vassert(IS_VALID_PFX(pfx));
1182 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1183 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
1184 toBool(sz==1 && !haveREX(pfx)) );
1187 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1189 vassert(lo3bits < 8);
1190 vassert(IS_VALID_PFX(pfx));
1191 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1192 if (sz == 4) {
1193 sz = 8;
1194 return unop(Iop_64to32,
1195 IRExpr_Get(
1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1197 False/*!irregular*/ ),
1198 szToITy(sz)
1201 } else {
1202 return IRExpr_Get(
1203 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1204 toBool(sz==1 && !haveREX(pfx)) ),
1205 szToITy(sz)
1210 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1212 vassert(lo3bits < 8);
1213 vassert(IS_VALID_PFX(pfx));
1214 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1215 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1216 stmt( IRStmt_Put(
1217 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1218 toBool(sz==1 && !haveREX(pfx)) ),
1219 sz==4 ? unop(Iop_32Uto64,e) : e
1224 /* Functions for getting register numbers from modrm bytes and REX
1225 when we don't have to consider the complexities of integer subreg
1226 accesses.
1228 /* Extract the g reg field from a modRM byte, and augment it using the
1229 REX.R bit from the supplied REX byte. The R bit usually is
1230 associated with the g register field.
1232 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1234 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1235 reg += (pfx & PFX_REXR) ? 8 : 0;
1236 return reg;
1239 /* Extract the e reg field from a modRM byte, and augment it using the
1240 REX.B bit from the supplied REX byte. The B bit usually is
1241 associated with the e register field (when modrm indicates e is a
1242 register, that is).
1244 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1246 Int rm;
1247 vassert(epartIsReg(mod_reg_rm));
1248 rm = (Int)(mod_reg_rm & 0x7);
1249 rm += (pfx & PFX_REXB) ? 8 : 0;
1250 return rm;
1254 /* General functions for dealing with integer register access. */
1256 /* Produce the guest state offset for a reference to the 'g' register
1257 field in a modrm byte, taking into account REX (or its absence),
1258 and the size of the access.
1260 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1262 UInt reg;
1263 vassert(host_endness == VexEndnessLE);
1264 vassert(IS_VALID_PFX(pfx));
1265 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1266 reg = gregOfRexRM( pfx, mod_reg_rm );
1267 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1270 static
1271 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1273 if (sz == 4) {
1274 sz = 8;
1275 return unop(Iop_64to32,
1276 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1277 szToITy(sz) ));
1278 } else {
1279 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1280 szToITy(sz) );
1284 static
1285 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1287 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1288 if (sz == 4) {
1289 e = unop(Iop_32Uto64,e);
1291 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1294 static
1295 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1297 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
1298 toBool(sz==1 && !haveREX(pfx)) );
1302 static
1303 IRExpr* getIRegV ( Int sz, Prefix pfx )
1305 if (sz == 4) {
1306 sz = 8;
1307 return unop(Iop_64to32,
1308 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1309 szToITy(sz) ));
1310 } else {
1311 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1312 szToITy(sz) );
1316 static
1317 void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1319 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1320 if (sz == 4) {
1321 e = unop(Iop_32Uto64,e);
1323 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1326 static
1327 const HChar* nameIRegV ( Int sz, Prefix pfx )
1329 return nameIReg( sz, getVexNvvvv(pfx), False );
1334 /* Produce the guest state offset for a reference to the 'e' register
1335 field in a modrm byte, taking into account REX (or its absence),
1336 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1337 denotes a memory access rather than a register access.
1339 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1341 UInt reg;
1342 vassert(host_endness == VexEndnessLE);
1343 vassert(IS_VALID_PFX(pfx));
1344 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1345 reg = eregOfRexRM( pfx, mod_reg_rm );
1346 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
1349 static
1350 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1352 if (sz == 4) {
1353 sz = 8;
1354 return unop(Iop_64to32,
1355 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1356 szToITy(sz) ));
1357 } else {
1358 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1359 szToITy(sz) );
1363 static
1364 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1366 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1367 if (sz == 4) {
1368 e = unop(Iop_32Uto64,e);
1370 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1373 static
1374 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1376 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
1377 toBool(sz==1 && !haveREX(pfx)) );
1381 /*------------------------------------------------------------*/
1382 /*--- For dealing with XMM registers ---*/
1383 /*------------------------------------------------------------*/
1385 static Int ymmGuestRegOffset ( UInt ymmreg )
1387 switch (ymmreg) {
1388 case 0: return OFFB_YMM0;
1389 case 1: return OFFB_YMM1;
1390 case 2: return OFFB_YMM2;
1391 case 3: return OFFB_YMM3;
1392 case 4: return OFFB_YMM4;
1393 case 5: return OFFB_YMM5;
1394 case 6: return OFFB_YMM6;
1395 case 7: return OFFB_YMM7;
1396 case 8: return OFFB_YMM8;
1397 case 9: return OFFB_YMM9;
1398 case 10: return OFFB_YMM10;
1399 case 11: return OFFB_YMM11;
1400 case 12: return OFFB_YMM12;
1401 case 13: return OFFB_YMM13;
1402 case 14: return OFFB_YMM14;
1403 case 15: return OFFB_YMM15;
1404 default: vpanic("ymmGuestRegOffset(amd64)");
1408 static Int xmmGuestRegOffset ( UInt xmmreg )
1410 /* Correct for little-endian host only. */
1411 vassert(host_endness == VexEndnessLE);
1412 return ymmGuestRegOffset( xmmreg );
1415 /* Lanes of vector registers are always numbered from zero being the
1416 least significant lane (rightmost in the register). */
1418 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1420 /* Correct for little-endian host only. */
1421 vassert(host_endness == VexEndnessLE);
1422 vassert(laneno >= 0 && laneno < 8);
1423 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1426 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1428 /* Correct for little-endian host only. */
1429 vassert(host_endness == VexEndnessLE);
1430 vassert(laneno >= 0 && laneno < 4);
1431 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1434 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1436 /* Correct for little-endian host only. */
1437 vassert(host_endness == VexEndnessLE);
1438 vassert(laneno >= 0 && laneno < 2);
1439 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1442 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1444 /* Correct for little-endian host only. */
1445 vassert(host_endness == VexEndnessLE);
1446 vassert(laneno >= 0 && laneno < 2);
1447 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1450 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1452 /* Correct for little-endian host only. */
1453 vassert(host_endness == VexEndnessLE);
1454 vassert(laneno >= 0 && laneno < 4);
1455 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1458 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1460 /* Correct for little-endian host only. */
1461 vassert(host_endness == VexEndnessLE);
1462 vassert(laneno >= 0 && laneno < 8);
1463 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1466 static IRExpr* getXMMReg ( UInt xmmreg )
1468 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1471 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1473 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1476 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1478 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1481 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1483 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1486 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1488 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1491 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1493 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1496 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1498 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1499 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1502 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1504 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1505 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1508 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1510 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1511 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1514 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1516 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1517 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1520 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1522 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1523 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1526 static IRExpr* getYMMReg ( UInt xmmreg )
1528 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1531 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1533 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1536 static IRExpr* getYMMRegLane64F ( UInt ymmreg, Int laneno )
1538 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_F64 );
1541 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1543 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1546 static IRExpr* getYMMRegLane32F ( UInt ymmreg, Int laneno )
1548 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_F32 );
1551 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1553 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1556 static void putYMMReg ( UInt ymmreg, IRExpr* e )
1558 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1559 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1562 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1564 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1565 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1568 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1570 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1571 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1574 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1576 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1577 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1580 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1582 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1583 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1586 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1588 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1589 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1592 static IRExpr* mkV128 ( UShort mask )
1594 return IRExpr_Const(IRConst_V128(mask));
1597 /* Write the low half of a YMM reg and zero out the upper half. */
1598 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1600 putYMMRegLane128( ymmreg, 0, e );
1601 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1604 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1606 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1607 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1608 return unop(Iop_64to1,
1609 binop(Iop_And64,
1610 unop(Iop_1Uto64,x),
1611 unop(Iop_1Uto64,y)));
1614 /* Generate a compare-and-swap operation, operating on memory at
1615 'addr'. The expected value is 'expVal' and the new value is
1616 'newVal'. If the operation fails, then transfer control (with a
1617 no-redir jump (XXX no -- see comment at top of this file)) to
1618 'restart_point', which is presumably the address of the guest
1619 instruction again -- retrying, essentially. */
1620 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1621 Addr64 restart_point )
1623 IRCAS* cas;
1624 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1625 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1626 IRTemp oldTmp = newTemp(tyE);
1627 IRTemp expTmp = newTemp(tyE);
1628 vassert(tyE == tyN);
1629 vassert(tyE == Ity_I64 || tyE == Ity_I32
1630 || tyE == Ity_I16 || tyE == Ity_I8);
1631 assign(expTmp, expVal);
1632 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1633 NULL, mkexpr(expTmp), NULL, newVal );
1634 stmt( IRStmt_CAS(cas) );
1635 stmt( IRStmt_Exit(
1636 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1637 mkexpr(oldTmp), mkexpr(expTmp) ),
1638 Ijk_Boring, /*Ijk_NoRedir*/
1639 IRConst_U64( restart_point ),
1640 OFFB_RIP
1645 /*------------------------------------------------------------*/
1646 /*--- Helpers for %rflags. ---*/
1647 /*------------------------------------------------------------*/
1649 /* -------------- Evaluating the flags-thunk. -------------- */
1651 /* Build IR to calculate all the eflags from stored
1652 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1653 Ity_I64. */
1654 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1656 IRExpr** args
1657 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1658 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1659 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1660 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1661 IRExpr* call
1662 = mkIRExprCCall(
1663 Ity_I64,
1664 0/*regparm*/,
1665 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1666 args
1668 /* Exclude OP and NDEP from definedness checking. We're only
1669 interested in DEP1 and DEP2. */
1670 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1671 return call;
1674 /* Build IR to calculate some particular condition from stored
1675 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1676 Ity_Bit. */
1677 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1679 IRExpr** args
1680 = mkIRExprVec_5( mkU64(cond),
1681 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1682 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1683 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1684 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1685 IRExpr* call
1686 = mkIRExprCCall(
1687 Ity_I64,
1688 0/*regparm*/,
1689 "amd64g_calculate_condition", &amd64g_calculate_condition,
1690 args
1692 /* Exclude the requested condition, OP and NDEP from definedness
1693 checking. We're only interested in DEP1 and DEP2. */
1694 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1695 return unop(Iop_64to1, call);
1698 /* Build IR to calculate just the carry flag from stored
1699 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1700 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1702 IRExpr** args
1703 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1704 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1705 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1706 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1707 IRExpr* call
1708 = mkIRExprCCall(
1709 Ity_I64,
1710 0/*regparm*/,
1711 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1712 args
1714 /* Exclude OP and NDEP from definedness checking. We're only
1715 interested in DEP1 and DEP2. */
1716 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1717 return call;
1721 /* -------------- Building the flags-thunk. -------------- */
1723 /* The machinery in this section builds the flag-thunk following a
1724 flag-setting operation. Hence the various setFlags_* functions.
1727 static Bool isAddSub ( IROp op8 )
1729 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1732 static Bool isLogic ( IROp op8 )
1734 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1737 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1738 static IRExpr* widenUto64 ( IRExpr* e )
1740 switch (typeOfIRExpr(irsb->tyenv,e)) {
1741 case Ity_I64: return e;
1742 case Ity_I32: return unop(Iop_32Uto64, e);
1743 case Ity_I16: return unop(Iop_16Uto64, e);
1744 case Ity_I8: return unop(Iop_8Uto64, e);
1745 case Ity_I1: return unop(Iop_1Uto64, e);
1746 default: vpanic("widenUto64");
1750 /* S-widen 8/16/32/64 bit int expr to 32. */
1751 static IRExpr* widenSto64 ( IRExpr* e )
1753 switch (typeOfIRExpr(irsb->tyenv,e)) {
1754 case Ity_I64: return e;
1755 case Ity_I32: return unop(Iop_32Sto64, e);
1756 case Ity_I16: return unop(Iop_16Sto64, e);
1757 case Ity_I8: return unop(Iop_8Sto64, e);
1758 default: vpanic("widenSto64");
1762 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1763 of these combinations make sense. */
1764 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1766 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1767 if (src_ty == dst_ty)
1768 return e;
1769 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1770 return unop(Iop_32to16, e);
1771 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1772 return unop(Iop_32to8, e);
1773 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1774 return unop(Iop_64to32, e);
1775 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1776 return unop(Iop_64to16, e);
1777 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1778 return unop(Iop_64to8, e);
1780 vex_printf("\nsrc, dst tys are: ");
1781 ppIRType(src_ty);
1782 vex_printf(", ");
1783 ppIRType(dst_ty);
1784 vex_printf("\n");
1785 vpanic("narrowTo(amd64)");
1789 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1790 auto-sized up to the real op. */
1792 static
1793 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1795 Int ccOp = 0;
1796 switch (ty) {
1797 case Ity_I8: ccOp = 0; break;
1798 case Ity_I16: ccOp = 1; break;
1799 case Ity_I32: ccOp = 2; break;
1800 case Ity_I64: ccOp = 3; break;
1801 default: vassert(0);
1803 switch (op8) {
1804 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1805 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1806 default: ppIROp(op8);
1807 vpanic("setFlags_DEP1_DEP2(amd64)");
1809 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1810 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1811 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1815 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1817 static
1818 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1820 Int ccOp = 0;
1821 switch (ty) {
1822 case Ity_I8: ccOp = 0; break;
1823 case Ity_I16: ccOp = 1; break;
1824 case Ity_I32: ccOp = 2; break;
1825 case Ity_I64: ccOp = 3; break;
1826 default: vassert(0);
1828 switch (op8) {
1829 case Iop_Or8:
1830 case Iop_And8:
1831 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1832 default: ppIROp(op8);
1833 vpanic("setFlags_DEP1(amd64)");
1835 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1836 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1837 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1841 /* For shift operations, we put in the result and the undershifted
1842 result. Except if the shift amount is zero, the thunk is left
1843 unchanged. */
1845 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1846 IRTemp res,
1847 IRTemp resUS,
1848 IRType ty,
1849 IRTemp guard )
1851 Int ccOp = 0;
1852 switch (ty) {
1853 case Ity_I8: ccOp = 0; break;
1854 case Ity_I16: ccOp = 1; break;
1855 case Ity_I32: ccOp = 2; break;
1856 case Ity_I64: ccOp = 3; break;
1857 default: vassert(0);
1860 vassert(guard);
1862 /* Both kinds of right shifts are handled by the same thunk
1863 operation. */
1864 switch (op64) {
1865 case Iop_Shr64:
1866 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1867 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1868 default: ppIROp(op64);
1869 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1872 /* guard :: Ity_I8. We need to convert it to I1. */
1873 IRTemp guardB = newTemp(Ity_I1);
1874 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1876 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1877 stmt( IRStmt_Put( OFFB_CC_OP,
1878 IRExpr_ITE( mkexpr(guardB),
1879 mkU64(ccOp),
1880 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
1881 stmt( IRStmt_Put( OFFB_CC_DEP1,
1882 IRExpr_ITE( mkexpr(guardB),
1883 widenUto64(mkexpr(res)),
1884 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
1885 stmt( IRStmt_Put( OFFB_CC_DEP2,
1886 IRExpr_ITE( mkexpr(guardB),
1887 widenUto64(mkexpr(resUS)),
1888 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
1892 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1893 the former value of the carry flag, which unfortunately we have to
1894 compute. */
1896 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1898 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1900 switch (ty) {
1901 case Ity_I8: ccOp += 0; break;
1902 case Ity_I16: ccOp += 1; break;
1903 case Ity_I32: ccOp += 2; break;
1904 case Ity_I64: ccOp += 3; break;
1905 default: vassert(0);
1908 /* This has to come first, because calculating the C flag
1909 may require reading all four thunk fields. */
1910 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1911 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1912 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1913 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1917 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1918 two arguments. */
1920 static
1921 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1923 switch (ty) {
1924 case Ity_I8:
1925 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1926 break;
1927 case Ity_I16:
1928 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1929 break;
1930 case Ity_I32:
1931 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1932 break;
1933 case Ity_I64:
1934 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1935 break;
1936 default:
1937 vpanic("setFlags_MUL(amd64)");
1939 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1940 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1944 /* -------------- Condition codes. -------------- */
1946 /* Condition codes, using the AMD encoding. */
1948 static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
1950 switch (cond) {
1951 case AMD64CondO: return "o";
1952 case AMD64CondNO: return "no";
1953 case AMD64CondB: return "b";
1954 case AMD64CondNB: return "ae"; /*"nb";*/
1955 case AMD64CondZ: return "e"; /*"z";*/
1956 case AMD64CondNZ: return "ne"; /*"nz";*/
1957 case AMD64CondBE: return "be";
1958 case AMD64CondNBE: return "a"; /*"nbe";*/
1959 case AMD64CondS: return "s";
1960 case AMD64CondNS: return "ns";
1961 case AMD64CondP: return "p";
1962 case AMD64CondNP: return "np";
1963 case AMD64CondL: return "l";
1964 case AMD64CondNL: return "ge"; /*"nl";*/
1965 case AMD64CondLE: return "le";
1966 case AMD64CondNLE: return "g"; /*"nle";*/
1967 case AMD64CondAlways: return "ALWAYS";
1968 default: vpanic("name_AMD64Condcode");
1972 static
1973 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1974 /*OUT*/Bool* needInvert )
1976 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1977 if (cond & 1) {
1978 *needInvert = True;
1979 return cond-1;
1980 } else {
1981 *needInvert = False;
1982 return cond;
1987 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1989 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1990 appropriately.
1992 Optionally, generate a store for the 'tres' value. This can either
1993 be a normal store, or it can be a cas-with-possible-failure style
1994 store:
1996 if taddr is IRTemp_INVALID, then no store is generated.
1998 if taddr is not IRTemp_INVALID, then a store (using taddr as
1999 the address) is generated:
2001 if texpVal is IRTemp_INVALID then a normal store is
2002 generated, and restart_point must be zero (it is irrelevant).
2004 if texpVal is not IRTemp_INVALID then a cas-style store is
2005 generated. texpVal is the expected value, restart_point
2006 is the restart point if the store fails, and texpVal must
2007 have the same type as tres.
2010 static void helper_ADC ( Int sz,
2011 IRTemp tres, IRTemp ta1, IRTemp ta2,
2012 /* info about optional store: */
2013 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2015 UInt thunkOp;
2016 IRType ty = szToITy(sz);
2017 IRTemp oldc = newTemp(Ity_I64);
2018 IRTemp oldcn = newTemp(ty);
2019 IROp plus = mkSizedOp(ty, Iop_Add8);
2020 IROp xor = mkSizedOp(ty, Iop_Xor8);
2022 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2024 switch (sz) {
2025 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2026 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2027 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2028 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2029 default: vassert(0);
2032 /* oldc = old carry flag, 0 or 1 */
2033 assign( oldc, binop(Iop_And64,
2034 mk_amd64g_calculate_rflags_c(),
2035 mkU64(1)) );
2037 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2039 assign( tres, binop(plus,
2040 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2041 mkexpr(oldcn)) );
2043 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2044 start of this function. */
2045 if (taddr != IRTemp_INVALID) {
2046 if (texpVal == IRTemp_INVALID) {
2047 vassert(restart_point == 0);
2048 storeLE( mkexpr(taddr), mkexpr(tres) );
2049 } else {
2050 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2051 /* .. and hence 'texpVal' has the same type as 'tres'. */
2052 casLE( mkexpr(taddr),
2053 mkexpr(texpVal), mkexpr(tres), restart_point );
2057 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2058 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2059 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2060 mkexpr(oldcn)) )) );
2061 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2065 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2066 appropriately. As with helper_ADC, possibly generate a store of
2067 the result -- see comments on helper_ADC for details.
2069 static void helper_SBB ( Int sz,
2070 IRTemp tres, IRTemp ta1, IRTemp ta2,
2071 /* info about optional store: */
2072 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2074 UInt thunkOp;
2075 IRType ty = szToITy(sz);
2076 IRTemp oldc = newTemp(Ity_I64);
2077 IRTemp oldcn = newTemp(ty);
2078 IROp minus = mkSizedOp(ty, Iop_Sub8);
2079 IROp xor = mkSizedOp(ty, Iop_Xor8);
2081 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2083 switch (sz) {
2084 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2085 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2086 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2087 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2088 default: vassert(0);
2091 /* oldc = old carry flag, 0 or 1 */
2092 assign( oldc, binop(Iop_And64,
2093 mk_amd64g_calculate_rflags_c(),
2094 mkU64(1)) );
2096 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2098 assign( tres, binop(minus,
2099 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2100 mkexpr(oldcn)) );
2102 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2103 start of this function. */
2104 if (taddr != IRTemp_INVALID) {
2105 if (texpVal == IRTemp_INVALID) {
2106 vassert(restart_point == 0);
2107 storeLE( mkexpr(taddr), mkexpr(tres) );
2108 } else {
2109 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2110 /* .. and hence 'texpVal' has the same type as 'tres'. */
2111 casLE( mkexpr(taddr),
2112 mkexpr(texpVal), mkexpr(tres), restart_point );
2116 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2117 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2118 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2119 mkexpr(oldcn)) )) );
2120 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2124 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2125 and set flags appropriately.
2127 static void helper_ADCX_ADOX ( Bool isADCX, Int sz,
2128 IRTemp tres, IRTemp ta1, IRTemp ta2 )
2130 UInt thunkOp;
2131 IRType ty = szToITy(sz);
2132 IRTemp oldflags = newTemp(Ity_I64);
2133 IRTemp oldOC = newTemp(Ity_I64); // old O or C flag
2134 IRTemp oldOCn = newTemp(ty); // old O or C flag, narrowed
2135 IROp plus = mkSizedOp(ty, Iop_Add8);
2136 IROp xor = mkSizedOp(ty, Iop_Xor8);
2138 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2140 switch (sz) {
2141 case 8: thunkOp = isADCX ? AMD64G_CC_OP_ADCX64
2142 : AMD64G_CC_OP_ADOX64; break;
2143 case 4: thunkOp = isADCX ? AMD64G_CC_OP_ADCX32
2144 : AMD64G_CC_OP_ADOX32; break;
2145 default: vassert(0);
2148 assign( oldflags, mk_amd64g_calculate_rflags_all() );
2150 /* oldOC = old overflow/carry flag, 0 or 1 */
2151 assign( oldOC, binop(Iop_And64,
2152 binop(Iop_Shr64,
2153 mkexpr(oldflags),
2154 mkU8(isADCX ? AMD64G_CC_SHIFT_C
2155 : AMD64G_CC_SHIFT_O)),
2156 mkU64(1)) );
2158 assign( oldOCn, narrowTo(ty, mkexpr(oldOC)) );
2160 assign( tres, binop(plus,
2161 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2162 mkexpr(oldOCn)) );
2164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2165 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2166 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2167 mkexpr(oldOCn)) )) );
2168 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldflags) ) );
2172 /* -------------- Helpers for disassembly printing. -------------- */
2174 static const HChar* nameGrp1 ( Int opc_aux )
2176 static const HChar* grp1_names[8]
2177 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2178 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2179 return grp1_names[opc_aux];
2182 static const HChar* nameGrp2 ( Int opc_aux )
2184 static const HChar* grp2_names[8]
2185 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2186 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
2187 return grp2_names[opc_aux];
2190 static const HChar* nameGrp4 ( Int opc_aux )
2192 static const HChar* grp4_names[8]
2193 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2194 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2195 return grp4_names[opc_aux];
2198 static const HChar* nameGrp5 ( Int opc_aux )
2200 static const HChar* grp5_names[8]
2201 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2202 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2203 return grp5_names[opc_aux];
2206 static const HChar* nameGrp8 ( Int opc_aux )
2208 static const HChar* grp8_names[8]
2209 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2210 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2211 return grp8_names[opc_aux];
2214 static const HChar* nameSReg ( UInt sreg )
2216 switch (sreg) {
2217 case R_ES: return "%es";
2218 case R_CS: return "%cs";
2219 case R_SS: return "%ss";
2220 case R_DS: return "%ds";
2221 case R_FS: return "%fs";
2222 case R_GS: return "%gs";
2223 default: vpanic("nameSReg(amd64)");
2227 static const HChar* nameMMXReg ( Int mmxreg )
2229 static const HChar* mmx_names[8]
2230 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2231 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2232 return mmx_names[mmxreg];
2235 static const HChar* nameXMMReg ( Int xmmreg )
2237 static const HChar* xmm_names[16]
2238 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2239 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2240 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2241 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2242 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2243 return xmm_names[xmmreg];
2246 static const HChar* nameMMXGran ( Int gran )
2248 switch (gran) {
2249 case 0: return "b";
2250 case 1: return "w";
2251 case 2: return "d";
2252 case 3: return "q";
2253 default: vpanic("nameMMXGran(amd64,guest)");
2257 static HChar nameISize ( Int size )
2259 switch (size) {
2260 case 8: return 'q';
2261 case 4: return 'l';
2262 case 2: return 'w';
2263 case 1: return 'b';
2264 default: vpanic("nameISize(amd64)");
2268 static const HChar* nameYMMReg ( Int ymmreg )
2270 static const HChar* ymm_names[16]
2271 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2272 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2273 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2274 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2275 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2276 return ymm_names[ymmreg];
2280 /*------------------------------------------------------------*/
2281 /*--- JMP helpers ---*/
2282 /*------------------------------------------------------------*/
2284 static void jmp_lit( /*MOD*/DisResult* dres,
2285 IRJumpKind kind, Addr64 d64 )
2287 vassert(dres->whatNext == Dis_Continue);
2288 vassert(dres->len == 0);
2289 vassert(dres->continueAt == 0);
2290 vassert(dres->jk_StopHere == Ijk_INVALID);
2291 dres->whatNext = Dis_StopHere;
2292 dres->jk_StopHere = kind;
2293 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
2296 static void jmp_treg( /*MOD*/DisResult* dres,
2297 IRJumpKind kind, IRTemp t )
2299 vassert(dres->whatNext == Dis_Continue);
2300 vassert(dres->len == 0);
2301 vassert(dres->continueAt == 0);
2302 vassert(dres->jk_StopHere == Ijk_INVALID);
2303 dres->whatNext = Dis_StopHere;
2304 dres->jk_StopHere = kind;
2305 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
2308 static
2309 void jcc_01 ( /*MOD*/DisResult* dres,
2310 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2312 Bool invert;
2313 AMD64Condcode condPos;
2314 vassert(dres->whatNext == Dis_Continue);
2315 vassert(dres->len == 0);
2316 vassert(dres->continueAt == 0);
2317 vassert(dres->jk_StopHere == Ijk_INVALID);
2318 dres->whatNext = Dis_StopHere;
2319 dres->jk_StopHere = Ijk_Boring;
2320 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2321 if (invert) {
2322 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2323 Ijk_Boring,
2324 IRConst_U64(d64_false),
2325 OFFB_RIP ) );
2326 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
2327 } else {
2328 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2329 Ijk_Boring,
2330 IRConst_U64(d64_true),
2331 OFFB_RIP ) );
2332 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
2336 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2337 guest address of the next instruction to be executed.
2339 This function generates an AbiHint to say that -128(%rsp)
2340 .. -1(%rsp) should now be regarded as uninitialised.
2342 static
2343 void make_redzone_AbiHint ( const VexAbiInfo* vbi,
2344 IRTemp new_rsp, IRTemp nia, const HChar* who )
2346 Int szB = vbi->guest_stack_redzone_size;
2347 vassert(szB >= 0);
2349 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2350 for is ELF. So just check it's the expected 128 value
2351 (paranoia). */
2352 vassert(szB == 128);
2354 if (0) vex_printf("AbiHint: %s\n", who);
2355 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2356 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2357 if (szB > 0)
2358 stmt( IRStmt_AbiHint(
2359 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2360 szB,
2361 mkexpr(nia)
2366 /*------------------------------------------------------------*/
2367 /*--- Disassembling addressing modes ---*/
2368 /*------------------------------------------------------------*/
2370 static
2371 const HChar* segRegTxt ( Prefix pfx )
2373 if (pfx & PFX_CS) return "%cs:";
2374 if (pfx & PFX_DS) return "%ds:";
2375 if (pfx & PFX_ES) return "%es:";
2376 if (pfx & PFX_FS) return "%fs:";
2377 if (pfx & PFX_GS) return "%gs:";
2378 if (pfx & PFX_SS) return "%ss:";
2379 return ""; /* no override */
2383 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2384 linear address by adding any required segment override as indicated
2385 by sorb, and also dealing with any address size override
2386 present. */
2387 static
2388 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi,
2389 Prefix pfx, IRExpr* virtual )
2391 /* --- address size override --- */
2392 if (haveASO(pfx))
2393 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2395 /* Note that the below are hacks that relies on the assumption
2396 that %fs or %gs are constant.
2397 Typically, %fs is always 0x63 on linux (in the main thread, it
2398 stays at value 0), %gs always 0x60 on Darwin, ... */
2399 /* --- segment overrides --- */
2400 if (pfx & PFX_FS) {
2401 if (vbi->guest_amd64_assume_fs_is_const) {
2402 /* return virtual + guest_FS_CONST. */
2403 virtual = binop(Iop_Add64, virtual,
2404 IRExpr_Get(OFFB_FS_CONST, Ity_I64));
2405 } else {
2406 unimplemented("amd64 %fs segment override");
2410 if (pfx & PFX_GS) {
2411 if (vbi->guest_amd64_assume_gs_is_const) {
2412 /* return virtual + guest_GS_CONST. */
2413 virtual = binop(Iop_Add64, virtual,
2414 IRExpr_Get(OFFB_GS_CONST, Ity_I64));
2415 } else {
2416 unimplemented("amd64 %gs segment override");
2420 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2422 return virtual;
2425 //.. {
2426 //.. Int sreg;
2427 //.. IRType hWordTy;
2428 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2429 //..
2430 //.. if (sorb == 0)
2431 //.. /* the common case - no override */
2432 //.. return virtual;
2433 //..
2434 //.. switch (sorb) {
2435 //.. case 0x3E: sreg = R_DS; break;
2436 //.. case 0x26: sreg = R_ES; break;
2437 //.. case 0x64: sreg = R_FS; break;
2438 //.. case 0x65: sreg = R_GS; break;
2439 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2440 //.. }
2441 //..
2442 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2443 //..
2444 //.. seg_selector = newTemp(Ity_I32);
2445 //.. ldt_ptr = newTemp(hWordTy);
2446 //.. gdt_ptr = newTemp(hWordTy);
2447 //.. r64 = newTemp(Ity_I64);
2448 //..
2449 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2450 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2451 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2452 //..
2453 //.. /*
2454 //.. Call this to do the translation and limit checks:
2455 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2456 //.. UInt seg_selector, UInt virtual_addr )
2457 //.. */
2458 //.. assign(
2459 //.. r64,
2460 //.. mkIRExprCCall(
2461 //.. Ity_I64,
2462 //.. 0/*regparms*/,
2463 //.. "x86g_use_seg_selector",
2464 //.. &x86g_use_seg_selector,
2465 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2466 //.. mkexpr(seg_selector), virtual)
2467 //.. )
2468 //.. );
2469 //..
2470 //.. /* If the high 32 of the result are non-zero, there was a
2471 //.. failure in address translation. In which case, make a
2472 //.. quick exit.
2473 //.. */
2474 //.. stmt(
2475 //.. IRStmt_Exit(
2476 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2477 //.. Ijk_MapFail,
2478 //.. IRConst_U32( guest_eip_curr_instr )
2479 //.. )
2480 //.. );
2481 //..
2482 //.. /* otherwise, here's the translated result. */
2483 //.. return unop(Iop_64to32, mkexpr(r64));
2484 //.. }
2487 /* Generate IR to calculate an address indicated by a ModRM and
2488 following SIB bytes. The expression, and the number of bytes in
2489 the address mode, are returned (the latter in *len). Note that
2490 this fn should not be called if the R/M part of the address denotes
2491 a register instead of memory. If print_codegen is true, text of
2492 the addressing mode is placed in buf.
2494 The computed address is stored in a new tempreg, and the
2495 identity of the tempreg is returned.
2497 extra_bytes holds the number of bytes after the amode, as supplied
2498 by the caller. This is needed to make sense of %rip-relative
2499 addresses. Note that the value that *len is set to is only the
2500 length of the amode itself and does not include the value supplied
2501 in extra_bytes.
2504 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2506 IRTemp tmp = newTemp(Ity_I64);
2507 assign( tmp, addr64 );
2508 return tmp;
2511 static
2512 IRTemp disAMode ( /*OUT*/Int* len,
2513 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2514 /*OUT*/HChar* buf, Int extra_bytes )
2516 UChar mod_reg_rm = getUChar(delta);
2517 delta++;
2519 buf[0] = (UChar)0;
2520 vassert(extra_bytes >= 0 && extra_bytes < 10);
2522 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2523 jump table seems a bit excessive.
2525 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2526 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2527 /* is now XX0XXYYY */
2528 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2529 switch (mod_reg_rm) {
2531 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2532 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2534 case 0x00: case 0x01: case 0x02: case 0x03:
2535 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2536 { UChar rm = toUChar(mod_reg_rm & 7);
2537 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2538 *len = 1;
2539 return disAMode_copy2tmp(
2540 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2543 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2544 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2546 case 0x08: case 0x09: case 0x0A: case 0x0B:
2547 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2548 { UChar rm = toUChar(mod_reg_rm & 7);
2549 Long d = getSDisp8(delta);
2550 if (d == 0) {
2551 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2552 } else {
2553 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2555 *len = 2;
2556 return disAMode_copy2tmp(
2557 handleAddrOverrides(vbi, pfx,
2558 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2561 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2562 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2564 case 0x10: case 0x11: case 0x12: case 0x13:
2565 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2566 { UChar rm = toUChar(mod_reg_rm & 7);
2567 Long d = getSDisp32(delta);
2568 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2569 *len = 5;
2570 return disAMode_copy2tmp(
2571 handleAddrOverrides(vbi, pfx,
2572 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2575 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2576 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2577 case 0x18: case 0x19: case 0x1A: case 0x1B:
2578 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2579 vpanic("disAMode(amd64): not an addr!");
2581 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2582 correctly at the start of handling each instruction. */
2583 case 0x05:
2584 { Long d = getSDisp32(delta);
2585 *len = 5;
2586 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2587 /* We need to know the next instruction's start address.
2588 Try and figure out what it is, record the guess, and ask
2589 the top-level driver logic (bbToIR_AMD64) to check we
2590 guessed right, after the instruction is completely
2591 decoded. */
2592 guest_RIP_next_mustcheck = True;
2593 guest_RIP_next_assumed = guest_RIP_bbstart
2594 + delta+4 + extra_bytes;
2595 return disAMode_copy2tmp(
2596 handleAddrOverrides(vbi, pfx,
2597 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2598 mkU64(d))));
2601 case 0x04: {
2602 /* SIB, with no displacement. Special cases:
2603 -- %rsp cannot act as an index value.
2604 If index_r indicates %rsp, zero is used for the index.
2605 -- when mod is zero and base indicates RBP or R13, base is
2606 instead a 32-bit sign-extended literal.
2607 It's all madness, I tell you. Extract %index, %base and
2608 scale from the SIB byte. The value denoted is then:
2609 | %index == %RSP && (%base == %RBP || %base == %R13)
2610 = d32 following SIB byte
2611 | %index == %RSP && !(%base == %RBP || %base == %R13)
2612 = %base
2613 | %index != %RSP && (%base == %RBP || %base == %R13)
2614 = d32 following SIB byte + (%index << scale)
2615 | %index != %RSP && !(%base == %RBP || %base == %R13)
2616 = %base + (%index << scale)
2618 UChar sib = getUChar(delta);
2619 UChar scale = toUChar((sib >> 6) & 3);
2620 UChar index_r = toUChar((sib >> 3) & 7);
2621 UChar base_r = toUChar(sib & 7);
2622 /* correct since #(R13) == 8 + #(RBP) */
2623 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2624 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2625 delta++;
2627 if ((!index_is_SP) && (!base_is_BPor13)) {
2628 if (scale == 0) {
2629 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2630 nameIRegRexB(8,pfx,base_r),
2631 nameIReg64rexX(pfx,index_r));
2632 } else {
2633 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2634 nameIRegRexB(8,pfx,base_r),
2635 nameIReg64rexX(pfx,index_r), 1<<scale);
2637 *len = 2;
2638 return
2639 disAMode_copy2tmp(
2640 handleAddrOverrides(vbi, pfx,
2641 binop(Iop_Add64,
2642 getIRegRexB(8,pfx,base_r),
2643 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2644 mkU8(scale)))));
2647 if ((!index_is_SP) && base_is_BPor13) {
2648 Long d = getSDisp32(delta);
2649 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2650 nameIReg64rexX(pfx,index_r), 1<<scale);
2651 *len = 6;
2652 return
2653 disAMode_copy2tmp(
2654 handleAddrOverrides(vbi, pfx,
2655 binop(Iop_Add64,
2656 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2657 mkU8(scale)),
2658 mkU64(d))));
2661 if (index_is_SP && (!base_is_BPor13)) {
2662 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2663 *len = 2;
2664 return disAMode_copy2tmp(
2665 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2668 if (index_is_SP && base_is_BPor13) {
2669 Long d = getSDisp32(delta);
2670 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2671 *len = 6;
2672 return disAMode_copy2tmp(
2673 handleAddrOverrides(vbi, pfx, mkU64(d)));
2676 vassert(0);
2679 /* SIB, with 8-bit displacement. Special cases:
2680 -- %esp cannot act as an index value.
2681 If index_r indicates %esp, zero is used for the index.
2682 Denoted value is:
2683 | %index == %ESP
2684 = d8 + %base
2685 | %index != %ESP
2686 = d8 + %base + (%index << scale)
2688 case 0x0C: {
2689 UChar sib = getUChar(delta);
2690 UChar scale = toUChar((sib >> 6) & 3);
2691 UChar index_r = toUChar((sib >> 3) & 7);
2692 UChar base_r = toUChar(sib & 7);
2693 Long d = getSDisp8(delta+1);
2695 if (index_r == R_RSP && 0==getRexX(pfx)) {
2696 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2697 d, nameIRegRexB(8,pfx,base_r));
2698 *len = 3;
2699 return disAMode_copy2tmp(
2700 handleAddrOverrides(vbi, pfx,
2701 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2702 } else {
2703 if (scale == 0) {
2704 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2705 nameIRegRexB(8,pfx,base_r),
2706 nameIReg64rexX(pfx,index_r));
2707 } else {
2708 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2709 nameIRegRexB(8,pfx,base_r),
2710 nameIReg64rexX(pfx,index_r), 1<<scale);
2712 *len = 3;
2713 return
2714 disAMode_copy2tmp(
2715 handleAddrOverrides(vbi, pfx,
2716 binop(Iop_Add64,
2717 binop(Iop_Add64,
2718 getIRegRexB(8,pfx,base_r),
2719 binop(Iop_Shl64,
2720 getIReg64rexX(pfx,index_r), mkU8(scale))),
2721 mkU64(d))));
2723 vassert(0); /*NOTREACHED*/
2726 /* SIB, with 32-bit displacement. Special cases:
2727 -- %rsp cannot act as an index value.
2728 If index_r indicates %rsp, zero is used for the index.
2729 Denoted value is:
2730 | %index == %RSP
2731 = d32 + %base
2732 | %index != %RSP
2733 = d32 + %base + (%index << scale)
2735 case 0x14: {
2736 UChar sib = getUChar(delta);
2737 UChar scale = toUChar((sib >> 6) & 3);
2738 UChar index_r = toUChar((sib >> 3) & 7);
2739 UChar base_r = toUChar(sib & 7);
2740 Long d = getSDisp32(delta+1);
2742 if (index_r == R_RSP && 0==getRexX(pfx)) {
2743 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2744 d, nameIRegRexB(8,pfx,base_r));
2745 *len = 6;
2746 return disAMode_copy2tmp(
2747 handleAddrOverrides(vbi, pfx,
2748 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2749 } else {
2750 if (scale == 0) {
2751 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2752 nameIRegRexB(8,pfx,base_r),
2753 nameIReg64rexX(pfx,index_r));
2754 } else {
2755 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2756 nameIRegRexB(8,pfx,base_r),
2757 nameIReg64rexX(pfx,index_r), 1<<scale);
2759 *len = 6;
2760 return
2761 disAMode_copy2tmp(
2762 handleAddrOverrides(vbi, pfx,
2763 binop(Iop_Add64,
2764 binop(Iop_Add64,
2765 getIRegRexB(8,pfx,base_r),
2766 binop(Iop_Shl64,
2767 getIReg64rexX(pfx,index_r), mkU8(scale))),
2768 mkU64(d))));
2770 vassert(0); /*NOTREACHED*/
2773 default:
2774 vpanic("disAMode(amd64)");
2775 return 0; /*notreached*/
2780 /* Similarly for VSIB addressing. This returns just the addend,
2781 and fills in *rI and *vscale with the register number of the vector
2782 index and its multiplicand. */
2783 static
2784 IRTemp disAVSIBMode ( /*OUT*/Int* len,
2785 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2786 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2787 IRType ty, /*OUT*/Int* vscale )
2789 UChar mod_reg_rm = getUChar(delta);
2790 const HChar *vindex;
2792 *len = 0;
2793 *rI = 0;
2794 *vscale = 0;
2795 buf[0] = (UChar)0;
2796 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2797 return IRTemp_INVALID;
2799 UChar sib = getUChar(delta+1);
2800 UChar scale = toUChar((sib >> 6) & 3);
2801 UChar index_r = toUChar((sib >> 3) & 7);
2802 UChar base_r = toUChar(sib & 7);
2803 Long d = 0;
2804 /* correct since #(R13) == 8 + #(RBP) */
2805 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2806 delta += 2;
2807 *len = 2;
2809 *rI = index_r | (getRexX(pfx) << 3);
2810 if (ty == Ity_V128)
2811 vindex = nameXMMReg(*rI);
2812 else
2813 vindex = nameYMMReg(*rI);
2814 *vscale = 1<<scale;
2816 switch (mod_reg_rm >> 6) {
2817 case 0:
2818 if (base_is_BPor13) {
2819 d = getSDisp32(delta);
2820 *len += 4;
2821 if (scale == 0) {
2822 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2823 } else {
2824 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2826 return disAMode_copy2tmp( mkU64(d) );
2827 } else {
2828 if (scale == 0) {
2829 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2830 nameIRegRexB(8,pfx,base_r), vindex);
2831 } else {
2832 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2833 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2836 break;
2837 case 1:
2838 d = getSDisp8(delta);
2839 *len += 1;
2840 goto have_disp;
2841 case 2:
2842 d = getSDisp32(delta);
2843 *len += 4;
2844 have_disp:
2845 if (scale == 0) {
2846 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2847 nameIRegRexB(8,pfx,base_r), vindex);
2848 } else {
2849 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2850 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2852 break;
2855 if (!d)
2856 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2857 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2858 mkU64(d)) );
2862 /* Figure out the number of (insn-stream) bytes constituting the amode
2863 beginning at delta. Is useful for getting hold of literals beyond
2864 the end of the amode before it has been disassembled. */
2866 static UInt lengthAMode ( Prefix pfx, Long delta )
2868 UChar mod_reg_rm = getUChar(delta);
2869 delta++;
2871 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2872 jump table seems a bit excessive.
2874 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2875 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2876 /* is now XX0XXYYY */
2877 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2878 switch (mod_reg_rm) {
2880 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2881 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2883 case 0x00: case 0x01: case 0x02: case 0x03:
2884 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2885 return 1;
2887 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2888 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2890 case 0x08: case 0x09: case 0x0A: case 0x0B:
2891 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2892 return 2;
2894 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2895 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2897 case 0x10: case 0x11: case 0x12: case 0x13:
2898 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2899 return 5;
2901 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2902 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2903 /* Not an address, but still handled. */
2904 case 0x18: case 0x19: case 0x1A: case 0x1B:
2905 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2906 return 1;
2908 /* RIP + disp32. */
2909 case 0x05:
2910 return 5;
2912 case 0x04: {
2913 /* SIB, with no displacement. */
2914 UChar sib = getUChar(delta);
2915 UChar base_r = toUChar(sib & 7);
2916 /* correct since #(R13) == 8 + #(RBP) */
2917 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2919 if (base_is_BPor13) {
2920 return 6;
2921 } else {
2922 return 2;
2926 /* SIB, with 8-bit displacement. */
2927 case 0x0C:
2928 return 3;
2930 /* SIB, with 32-bit displacement. */
2931 case 0x14:
2932 return 6;
2934 default:
2935 vpanic("lengthAMode(amd64)");
2936 return 0; /*notreached*/
2941 /*------------------------------------------------------------*/
2942 /*--- Disassembling common idioms ---*/
2943 /*------------------------------------------------------------*/
2945 typedef
2946 enum { WithFlagNone=2, WithFlagCarry, WithFlagCarryX, WithFlagOverX }
2947 WithFlag;
2949 /* Handle binary integer instructions of the form
2950 op E, G meaning
2951 op reg-or-mem, reg
2952 Is passed the a ptr to the modRM byte, the actual operation, and the
2953 data size. Returns the address advanced completely over this
2954 instruction.
2956 E(src) is reg-or-mem
2957 G(dst) is reg.
2959 If E is reg, --> GET %G, tmp
2960 OP %E, tmp
2961 PUT tmp, %G
2963 If E is mem and OP is not reversible,
2964 --> (getAddr E) -> tmpa
2965 LD (tmpa), tmpa
2966 GET %G, tmp2
2967 OP tmpa, tmp2
2968 PUT tmp2, %G
2970 If E is mem and OP is reversible
2971 --> (getAddr E) -> tmpa
2972 LD (tmpa), tmpa
2973 OP %G, tmpa
2974 PUT tmpa, %G
2976 static
2977 ULong dis_op2_E_G ( const VexAbiInfo* vbi,
2978 Prefix pfx,
2979 IROp op8,
2980 WithFlag flag,
2981 Bool keep,
2982 Int size,
2983 Long delta0,
2984 const HChar* t_amd64opc )
2986 HChar dis_buf[50];
2987 Int len;
2988 IRType ty = szToITy(size);
2989 IRTemp dst1 = newTemp(ty);
2990 IRTemp src = newTemp(ty);
2991 IRTemp dst0 = newTemp(ty);
2992 UChar rm = getUChar(delta0);
2993 IRTemp addr = IRTemp_INVALID;
2995 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
2996 switch (op8) {
2997 case Iop_Add8:
2998 switch (flag) {
2999 case WithFlagNone: case WithFlagCarry:
3000 case WithFlagCarryX: case WithFlagOverX:
3001 vassert(keep);
3002 break;
3003 default:
3004 vassert(0);
3006 break;
3007 case Iop_Sub8:
3008 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3009 if (flag == WithFlagCarry) vassert(keep);
3010 break;
3011 case Iop_And8:
3012 vassert(flag == WithFlagNone);
3013 break;
3014 case Iop_Or8: case Iop_Xor8:
3015 vassert(flag == WithFlagNone);
3016 vassert(keep);
3017 break;
3018 default:
3019 vassert(0);
3022 if (epartIsReg(rm)) {
3023 /* Specially handle XOR reg,reg, because that doesn't really
3024 depend on reg, and doing the obvious thing potentially
3025 generates a spurious value check failure due to the bogus
3026 dependency. Ditto SUB/SBB reg,reg. */
3027 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3028 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3029 putIRegG(size,pfx,rm, mkU(ty,0));
3032 assign( dst0, getIRegG(size,pfx,rm) );
3033 assign( src, getIRegE(size,pfx,rm) );
3035 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3036 helper_ADC( size, dst1, dst0, src,
3037 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3038 putIRegG(size, pfx, rm, mkexpr(dst1));
3039 } else
3040 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3041 helper_SBB( size, dst1, dst0, src,
3042 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3043 putIRegG(size, pfx, rm, mkexpr(dst1));
3044 } else
3045 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3046 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3047 putIRegG(size, pfx, rm, mkexpr(dst1));
3048 } else
3049 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3050 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3051 putIRegG(size, pfx, rm, mkexpr(dst1));
3052 } else {
3053 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3054 if (isAddSub(op8))
3055 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3056 else
3057 setFlags_DEP1(op8, dst1, ty);
3058 if (keep)
3059 putIRegG(size, pfx, rm, mkexpr(dst1));
3062 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3063 nameIRegE(size,pfx,rm),
3064 nameIRegG(size,pfx,rm));
3065 return 1+delta0;
3066 } else {
3067 /* E refers to memory */
3068 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3069 assign( dst0, getIRegG(size,pfx,rm) );
3070 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
3072 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3073 helper_ADC( size, dst1, dst0, src,
3074 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3075 putIRegG(size, pfx, rm, mkexpr(dst1));
3076 } else
3077 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3078 helper_SBB( size, dst1, dst0, src,
3079 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3080 putIRegG(size, pfx, rm, mkexpr(dst1));
3081 } else
3082 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3083 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3084 putIRegG(size, pfx, rm, mkexpr(dst1));
3085 } else
3086 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3087 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3088 putIRegG(size, pfx, rm, mkexpr(dst1));
3089 } else {
3090 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3091 if (isAddSub(op8))
3092 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3093 else
3094 setFlags_DEP1(op8, dst1, ty);
3095 if (keep)
3096 putIRegG(size, pfx, rm, mkexpr(dst1));
3099 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3100 dis_buf, nameIRegG(size, pfx, rm));
3101 return len+delta0;
3107 /* Handle binary integer instructions of the form
3108 op G, E meaning
3109 op reg, reg-or-mem
3110 Is passed the a ptr to the modRM byte, the actual operation, and the
3111 data size. Returns the address advanced completely over this
3112 instruction.
3114 G(src) is reg.
3115 E(dst) is reg-or-mem
3117 If E is reg, --> GET %E, tmp
3118 OP %G, tmp
3119 PUT tmp, %E
3121 If E is mem, --> (getAddr E) -> tmpa
3122 LD (tmpa), tmpv
3123 OP %G, tmpv
3124 ST tmpv, (tmpa)
3126 static
3127 ULong dis_op2_G_E ( const VexAbiInfo* vbi,
3128 Prefix pfx,
3129 IROp op8,
3130 WithFlag flag,
3131 Bool keep,
3132 Int size,
3133 Long delta0,
3134 const HChar* t_amd64opc )
3136 HChar dis_buf[50];
3137 Int len;
3138 IRType ty = szToITy(size);
3139 IRTemp dst1 = newTemp(ty);
3140 IRTemp src = newTemp(ty);
3141 IRTemp dst0 = newTemp(ty);
3142 UChar rm = getUChar(delta0);
3143 IRTemp addr = IRTemp_INVALID;
3145 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3146 switch (op8) {
3147 case Iop_Add8:
3148 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3149 vassert(keep);
3150 break;
3151 case Iop_Sub8:
3152 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3153 if (flag == WithFlagCarry) vassert(keep);
3154 break;
3155 case Iop_And8: case Iop_Or8: case Iop_Xor8:
3156 vassert(flag == WithFlagNone);
3157 vassert(keep);
3158 break;
3159 default:
3160 vassert(0);
3163 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3164 intended operation is add-with-carry or subtract-with-borrow. */
3166 if (epartIsReg(rm)) {
3167 /* Specially handle XOR reg,reg, because that doesn't really
3168 depend on reg, and doing the obvious thing potentially
3169 generates a spurious value check failure due to the bogus
3170 dependency. Ditto SUB/SBB reg,reg. */
3171 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3172 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3173 putIRegE(size,pfx,rm, mkU(ty,0));
3176 assign(dst0, getIRegE(size,pfx,rm));
3177 assign(src, getIRegG(size,pfx,rm));
3179 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3180 helper_ADC( size, dst1, dst0, src,
3181 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3182 putIRegE(size, pfx, rm, mkexpr(dst1));
3183 } else
3184 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3185 helper_SBB( size, dst1, dst0, src,
3186 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3187 putIRegE(size, pfx, rm, mkexpr(dst1));
3188 } else {
3189 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3190 if (isAddSub(op8))
3191 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3192 else
3193 setFlags_DEP1(op8, dst1, ty);
3194 if (keep)
3195 putIRegE(size, pfx, rm, mkexpr(dst1));
3198 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3199 nameIRegG(size,pfx,rm),
3200 nameIRegE(size,pfx,rm));
3201 return 1+delta0;
3204 /* E refers to memory */
3206 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3207 assign(dst0, loadLE(ty,mkexpr(addr)));
3208 assign(src, getIRegG(size,pfx,rm));
3210 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3211 if (haveLOCK(pfx)) {
3212 /* cas-style store */
3213 helper_ADC( size, dst1, dst0, src,
3214 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3215 } else {
3216 /* normal store */
3217 helper_ADC( size, dst1, dst0, src,
3218 /*store*/addr, IRTemp_INVALID, 0 );
3220 } else
3221 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3222 if (haveLOCK(pfx)) {
3223 /* cas-style store */
3224 helper_SBB( size, dst1, dst0, src,
3225 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3226 } else {
3227 /* normal store */
3228 helper_SBB( size, dst1, dst0, src,
3229 /*store*/addr, IRTemp_INVALID, 0 );
3231 } else {
3232 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3233 if (keep) {
3234 if (haveLOCK(pfx)) {
3235 if (0) vex_printf("locked case\n" );
3236 casLE( mkexpr(addr),
3237 mkexpr(dst0)/*expval*/,
3238 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3239 } else {
3240 if (0) vex_printf("nonlocked case\n");
3241 storeLE(mkexpr(addr), mkexpr(dst1));
3244 if (isAddSub(op8))
3245 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3246 else
3247 setFlags_DEP1(op8, dst1, ty);
3250 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3251 nameIRegG(size,pfx,rm), dis_buf);
3252 return len+delta0;
3257 /* Handle move instructions of the form
3258 mov E, G meaning
3259 mov reg-or-mem, reg
3260 Is passed the a ptr to the modRM byte, and the data size. Returns
3261 the address advanced completely over this instruction.
3263 E(src) is reg-or-mem
3264 G(dst) is reg.
3266 If E is reg, --> GET %E, tmpv
3267 PUT tmpv, %G
3269 If E is mem --> (getAddr E) -> tmpa
3270 LD (tmpa), tmpb
3271 PUT tmpb, %G
3273 static
3274 ULong dis_mov_E_G ( const VexAbiInfo* vbi,
3275 Prefix pfx,
3276 Int size,
3277 Long delta0 )
3279 Int len;
3280 UChar rm = getUChar(delta0);
3281 HChar dis_buf[50];
3283 if (epartIsReg(rm)) {
3284 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
3285 DIP("mov%c %s,%s\n", nameISize(size),
3286 nameIRegE(size,pfx,rm),
3287 nameIRegG(size,pfx,rm));
3288 return 1+delta0;
3291 /* E refers to memory */
3293 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3294 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
3295 DIP("mov%c %s,%s\n", nameISize(size),
3296 dis_buf,
3297 nameIRegG(size,pfx,rm));
3298 return delta0+len;
3303 /* Handle move instructions of the form
3304 mov G, E meaning
3305 mov reg, reg-or-mem
3306 Is passed the a ptr to the modRM byte, and the data size. Returns
3307 the address advanced completely over this instruction.
3308 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3310 G(src) is reg.
3311 E(dst) is reg-or-mem
3313 If E is reg, --> GET %G, tmp
3314 PUT tmp, %E
3316 If E is mem, --> (getAddr E) -> tmpa
3317 GET %G, tmpv
3318 ST tmpv, (tmpa)
3320 static
3321 ULong dis_mov_G_E ( const VexAbiInfo* vbi,
3322 Prefix pfx,
3323 Int size,
3324 Long delta0,
3325 /*OUT*/Bool* ok )
3327 Int len;
3328 UChar rm = getUChar(delta0);
3329 HChar dis_buf[50];
3331 *ok = True;
3333 if (epartIsReg(rm)) {
3334 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
3335 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
3336 DIP("mov%c %s,%s\n", nameISize(size),
3337 nameIRegG(size,pfx,rm),
3338 nameIRegE(size,pfx,rm));
3339 return 1+delta0;
3342 /* E refers to memory */
3344 if (haveF2(pfx)) { *ok = False; return delta0; }
3345 /* F3(XRELEASE) is acceptable, though. */
3346 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3347 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
3348 DIP("mov%c %s,%s\n", nameISize(size),
3349 nameIRegG(size,pfx,rm),
3350 dis_buf);
3351 return len+delta0;
3356 /* op $immediate, AL/AX/EAX/RAX. */
3357 static
3358 ULong dis_op_imm_A ( Int size,
3359 Bool carrying,
3360 IROp op8,
3361 Bool keep,
3362 Long delta,
3363 const HChar* t_amd64opc )
3365 Int size4 = imin(size,4);
3366 IRType ty = szToITy(size);
3367 IRTemp dst0 = newTemp(ty);
3368 IRTemp src = newTemp(ty);
3369 IRTemp dst1 = newTemp(ty);
3370 Long lit = getSDisp(size4,delta);
3371 assign(dst0, getIRegRAX(size));
3372 assign(src, mkU(ty,lit & mkSizeMask(size)));
3374 if (isAddSub(op8) && !carrying) {
3375 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3376 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3378 else
3379 if (isLogic(op8)) {
3380 vassert(!carrying);
3381 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3382 setFlags_DEP1(op8, dst1, ty);
3384 else
3385 if (op8 == Iop_Add8 && carrying) {
3386 helper_ADC( size, dst1, dst0, src,
3387 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3389 else
3390 if (op8 == Iop_Sub8 && carrying) {
3391 helper_SBB( size, dst1, dst0, src,
3392 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3394 else
3395 vpanic("dis_op_imm_A(amd64,guest)");
3397 if (keep)
3398 putIRegRAX(size, mkexpr(dst1));
3400 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3401 lit, nameIRegRAX(size));
3402 return delta+size4;
3406 /* Sign- and Zero-extending moves. */
3407 static
3408 ULong dis_movx_E_G ( const VexAbiInfo* vbi,
3409 Prefix pfx,
3410 Long delta, Int szs, Int szd, Bool sign_extend )
3412 UChar rm = getUChar(delta);
3413 if (epartIsReg(rm)) {
3414 putIRegG(szd, pfx, rm,
3415 doScalarWidening(
3416 szs,szd,sign_extend,
3417 getIRegE(szs,pfx,rm)));
3418 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3419 nameISize(szs),
3420 nameISize(szd),
3421 nameIRegE(szs,pfx,rm),
3422 nameIRegG(szd,pfx,rm));
3423 return 1+delta;
3426 /* E refers to memory */
3428 Int len;
3429 HChar dis_buf[50];
3430 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3431 putIRegG(szd, pfx, rm,
3432 doScalarWidening(
3433 szs,szd,sign_extend,
3434 loadLE(szToITy(szs),mkexpr(addr))));
3435 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3436 nameISize(szs),
3437 nameISize(szd),
3438 dis_buf,
3439 nameIRegG(szd,pfx,rm));
3440 return len+delta;
3445 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3446 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3447 static
3448 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3450 /* special-case the 64-bit case */
3451 if (sz == 8) {
3452 IROp op = signed_divide ? Iop_DivModS128to64
3453 : Iop_DivModU128to64;
3454 IRTemp src128 = newTemp(Ity_I128);
3455 IRTemp dst128 = newTemp(Ity_I128);
3456 assign( src128, binop(Iop_64HLto128,
3457 getIReg64(R_RDX),
3458 getIReg64(R_RAX)) );
3459 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3460 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3461 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3462 } else {
3463 IROp op = signed_divide ? Iop_DivModS64to32
3464 : Iop_DivModU64to32;
3465 IRTemp src64 = newTemp(Ity_I64);
3466 IRTemp dst64 = newTemp(Ity_I64);
3467 switch (sz) {
3468 case 4:
3469 assign( src64,
3470 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3471 assign( dst64,
3472 binop(op, mkexpr(src64), mkexpr(t)) );
3473 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3474 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3475 break;
3476 case 2: {
3477 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3478 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3479 assign( src64, unop(widen3264,
3480 binop(Iop_16HLto32,
3481 getIRegRDX(2),
3482 getIRegRAX(2))) );
3483 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3484 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3485 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3486 break;
3488 case 1: {
3489 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3490 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3491 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3492 assign( src64, unop(widen3264,
3493 unop(widen1632, getIRegRAX(2))) );
3494 assign( dst64,
3495 binop(op, mkexpr(src64),
3496 unop(widen1632, unop(widen816, mkexpr(t)))) );
3497 putIRegRAX( 1, unop(Iop_16to8,
3498 unop(Iop_32to16,
3499 unop(Iop_64to32,mkexpr(dst64)))) );
3500 putIRegAH( unop(Iop_16to8,
3501 unop(Iop_32to16,
3502 unop(Iop_64HIto32,mkexpr(dst64)))) );
3503 break;
3505 default:
3506 vpanic("codegen_div(amd64)");
3511 static
3512 ULong dis_Grp1 ( const VexAbiInfo* vbi,
3513 Prefix pfx,
3514 Long delta, UChar modrm,
3515 Int am_sz, Int d_sz, Int sz, Long d64 )
3517 Int len;
3518 HChar dis_buf[50];
3519 IRType ty = szToITy(sz);
3520 IRTemp dst1 = newTemp(ty);
3521 IRTemp src = newTemp(ty);
3522 IRTemp dst0 = newTemp(ty);
3523 IRTemp addr = IRTemp_INVALID;
3524 IROp op8 = Iop_INVALID;
3525 ULong mask = mkSizeMask(sz);
3527 switch (gregLO3ofRM(modrm)) {
3528 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3529 case 2: break; // ADC
3530 case 3: break; // SBB
3531 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3532 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3533 /*NOTREACHED*/
3534 default: vpanic("dis_Grp1(amd64): unhandled case");
3537 if (epartIsReg(modrm)) {
3538 vassert(am_sz == 1);
3540 assign(dst0, getIRegE(sz,pfx,modrm));
3541 assign(src, mkU(ty,d64 & mask));
3543 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3544 helper_ADC( sz, dst1, dst0, src,
3545 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3546 } else
3547 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3548 helper_SBB( sz, dst1, dst0, src,
3549 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3550 } else {
3551 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3552 if (isAddSub(op8))
3553 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3554 else
3555 setFlags_DEP1(op8, dst1, ty);
3558 if (gregLO3ofRM(modrm) < 7)
3559 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3561 delta += (am_sz + d_sz);
3562 DIP("%s%c $%lld, %s\n",
3563 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3564 nameIRegE(sz,pfx,modrm));
3565 } else {
3566 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3568 assign(dst0, loadLE(ty,mkexpr(addr)));
3569 assign(src, mkU(ty,d64 & mask));
3571 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3572 if (haveLOCK(pfx)) {
3573 /* cas-style store */
3574 helper_ADC( sz, dst1, dst0, src,
3575 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3576 } else {
3577 /* normal store */
3578 helper_ADC( sz, dst1, dst0, src,
3579 /*store*/addr, IRTemp_INVALID, 0 );
3581 } else
3582 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3583 if (haveLOCK(pfx)) {
3584 /* cas-style store */
3585 helper_SBB( sz, dst1, dst0, src,
3586 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3587 } else {
3588 /* normal store */
3589 helper_SBB( sz, dst1, dst0, src,
3590 /*store*/addr, IRTemp_INVALID, 0 );
3592 } else {
3593 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3594 if (gregLO3ofRM(modrm) < 7) {
3595 if (haveLOCK(pfx)) {
3596 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3597 mkexpr(dst1)/*newVal*/,
3598 guest_RIP_curr_instr );
3599 } else {
3600 storeLE(mkexpr(addr), mkexpr(dst1));
3603 if (isAddSub(op8))
3604 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3605 else
3606 setFlags_DEP1(op8, dst1, ty);
3609 delta += (len+d_sz);
3610 DIP("%s%c $%lld, %s\n",
3611 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3612 d64, dis_buf);
3614 return delta;
3618 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3619 expression. */
3621 static
3622 ULong dis_Grp2 ( const VexAbiInfo* vbi,
3623 Prefix pfx,
3624 Long delta, UChar modrm,
3625 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3626 const HChar* shift_expr_txt, Bool* decode_OK )
3628 /* delta on entry points at the modrm byte. */
3629 HChar dis_buf[50];
3630 Int len;
3631 Bool isShift, isRotate, isRotateC;
3632 IRType ty = szToITy(sz);
3633 IRTemp dst0 = newTemp(ty);
3634 IRTemp dst1 = newTemp(ty);
3635 IRTemp addr = IRTemp_INVALID;
3637 *decode_OK = True;
3639 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3641 /* Put value to shift/rotate in dst0. */
3642 if (epartIsReg(modrm)) {
3643 assign(dst0, getIRegE(sz, pfx, modrm));
3644 delta += (am_sz + d_sz);
3645 } else {
3646 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3647 assign(dst0, loadLE(ty,mkexpr(addr)));
3648 delta += len + d_sz;
3651 isShift = False;
3652 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
3654 isRotate = False;
3655 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3657 isRotateC = False;
3658 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3660 if (!isShift && !isRotate && !isRotateC) {
3661 /*NOTREACHED*/
3662 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3665 if (isRotateC) {
3666 /* Call a helper; this insn is so ridiculous it does not deserve
3667 better. One problem is, the helper has to calculate both the
3668 new value and the new flags. This is more than 64 bits, and
3669 there is no way to return more than 64 bits from the helper.
3670 Hence the crude and obvious solution is to call it twice,
3671 using the sign of the sz field to indicate whether it is the
3672 value or rflags result we want.
3674 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3675 IRExpr** argsVALUE;
3676 IRExpr** argsRFLAGS;
3678 IRTemp new_value = newTemp(Ity_I64);
3679 IRTemp new_rflags = newTemp(Ity_I64);
3680 IRTemp old_rflags = newTemp(Ity_I64);
3682 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3684 argsVALUE
3685 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3686 widenUto64(shift_expr), /* rotate amount */
3687 mkexpr(old_rflags),
3688 mkU64(sz) );
3689 assign( new_value,
3690 mkIRExprCCall(
3691 Ity_I64,
3692 0/*regparm*/,
3693 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3694 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3695 argsVALUE
3699 argsRFLAGS
3700 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3701 widenUto64(shift_expr), /* rotate amount */
3702 mkexpr(old_rflags),
3703 mkU64(-sz) );
3704 assign( new_rflags,
3705 mkIRExprCCall(
3706 Ity_I64,
3707 0/*regparm*/,
3708 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3709 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3710 argsRFLAGS
3714 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3715 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3716 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3717 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3718 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3721 else
3722 if (isShift) {
3724 IRTemp pre64 = newTemp(Ity_I64);
3725 IRTemp res64 = newTemp(Ity_I64);
3726 IRTemp res64ss = newTemp(Ity_I64);
3727 IRTemp shift_amt = newTemp(Ity_I8);
3728 UChar mask = toUChar(sz==8 ? 63 : 31);
3729 IROp op64;
3731 switch (gregLO3ofRM(modrm)) {
3732 case 4: op64 = Iop_Shl64; break;
3733 case 5: op64 = Iop_Shr64; break;
3734 case 6: op64 = Iop_Shl64; break;
3735 case 7: op64 = Iop_Sar64; break;
3736 /*NOTREACHED*/
3737 default: vpanic("dis_Grp2:shift"); break;
3740 /* Widen the value to be shifted to 64 bits, do the shift, and
3741 narrow back down. This seems surprisingly long-winded, but
3742 unfortunately the AMD semantics requires that 8/16/32-bit
3743 shifts give defined results for shift values all the way up
3744 to 32, and this seems the simplest way to do it. It has the
3745 advantage that the only IR level shifts generated are of 64
3746 bit values, and the shift amount is guaranteed to be in the
3747 range 0 .. 63, thereby observing the IR semantics requiring
3748 all shift values to be in the range 0 .. 2^word_size-1.
3750 Therefore the shift amount is masked with 63 for 64-bit shifts
3751 and 31 for all others.
3753 /* shift_amt = shift_expr & MASK, regardless of operation size */
3754 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3756 /* suitably widen the value to be shifted to 64 bits. */
3757 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3758 : widenUto64(mkexpr(dst0)) );
3760 /* res64 = pre64 `shift` shift_amt */
3761 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3763 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3764 assign( res64ss,
3765 binop(op64,
3766 mkexpr(pre64),
3767 binop(Iop_And8,
3768 binop(Iop_Sub8,
3769 mkexpr(shift_amt), mkU8(1)),
3770 mkU8(mask))) );
3772 /* Build the flags thunk. */
3773 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3775 /* Narrow the result back down. */
3776 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3778 } /* if (isShift) */
3780 else
3781 if (isRotate) {
3782 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3783 : (ty==Ity_I32 ? 2 : 3));
3784 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3785 IRTemp rot_amt = newTemp(Ity_I8);
3786 IRTemp rot_amt64 = newTemp(Ity_I8);
3787 IRTemp oldFlags = newTemp(Ity_I64);
3788 UChar mask = toUChar(sz==8 ? 63 : 31);
3790 /* rot_amt = shift_expr & mask */
3791 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3792 expressions never shift beyond the word size and thus remain
3793 well defined. */
3794 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3796 if (ty == Ity_I64)
3797 assign(rot_amt, mkexpr(rot_amt64));
3798 else
3799 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3801 if (left) {
3803 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3804 assign(dst1,
3805 binop( mkSizedOp(ty,Iop_Or8),
3806 binop( mkSizedOp(ty,Iop_Shl8),
3807 mkexpr(dst0),
3808 mkexpr(rot_amt)
3810 binop( mkSizedOp(ty,Iop_Shr8),
3811 mkexpr(dst0),
3812 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3816 ccOp += AMD64G_CC_OP_ROLB;
3818 } else { /* right */
3820 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3821 assign(dst1,
3822 binop( mkSizedOp(ty,Iop_Or8),
3823 binop( mkSizedOp(ty,Iop_Shr8),
3824 mkexpr(dst0),
3825 mkexpr(rot_amt)
3827 binop( mkSizedOp(ty,Iop_Shl8),
3828 mkexpr(dst0),
3829 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3833 ccOp += AMD64G_CC_OP_RORB;
3837 /* dst1 now holds the rotated value. Build flag thunk. We
3838 need the resulting value for this, and the previous flags.
3839 Except don't set it if the rotate count is zero. */
3841 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3843 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3844 IRTemp rot_amt64b = newTemp(Ity_I1);
3845 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3847 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3848 stmt( IRStmt_Put( OFFB_CC_OP,
3849 IRExpr_ITE( mkexpr(rot_amt64b),
3850 mkU64(ccOp),
3851 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
3852 stmt( IRStmt_Put( OFFB_CC_DEP1,
3853 IRExpr_ITE( mkexpr(rot_amt64b),
3854 widenUto64(mkexpr(dst1)),
3855 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
3856 stmt( IRStmt_Put( OFFB_CC_DEP2,
3857 IRExpr_ITE( mkexpr(rot_amt64b),
3858 mkU64(0),
3859 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
3860 stmt( IRStmt_Put( OFFB_CC_NDEP,
3861 IRExpr_ITE( mkexpr(rot_amt64b),
3862 mkexpr(oldFlags),
3863 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
3864 } /* if (isRotate) */
3866 /* Save result, and finish up. */
3867 if (epartIsReg(modrm)) {
3868 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3869 if (vex_traceflags & VEX_TRACE_FE) {
3870 vex_printf("%s%c ",
3871 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3872 if (shift_expr_txt)
3873 vex_printf("%s", shift_expr_txt);
3874 else
3875 ppIRExpr(shift_expr);
3876 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3878 } else {
3879 storeLE(mkexpr(addr), mkexpr(dst1));
3880 if (vex_traceflags & VEX_TRACE_FE) {
3881 vex_printf("%s%c ",
3882 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3883 if (shift_expr_txt)
3884 vex_printf("%s", shift_expr_txt);
3885 else
3886 ppIRExpr(shift_expr);
3887 vex_printf(", %s\n", dis_buf);
3890 return delta;
3894 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3895 static
3896 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi,
3897 Prefix pfx,
3898 Long delta, UChar modrm,
3899 Int am_sz, Int sz, ULong src_val,
3900 Bool* decode_OK )
3902 /* src_val denotes a d8.
3903 And delta on entry points at the modrm byte. */
3905 IRType ty = szToITy(sz);
3906 IRTemp t2 = newTemp(Ity_I64);
3907 IRTemp t2m = newTemp(Ity_I64);
3908 IRTemp t_addr = IRTemp_INVALID;
3909 HChar dis_buf[50];
3910 ULong mask;
3912 /* we're optimists :-) */
3913 *decode_OK = True;
3915 /* Check whether F2 or F3 are acceptable. */
3916 if (epartIsReg(modrm)) {
3917 /* F2 or F3 are not allowed in the register case. */
3918 if (haveF2orF3(pfx)) {
3919 *decode_OK = False;
3920 return delta;
3922 } else {
3923 /* F2 or F3 (but not both) are allowable provided LOCK is also
3924 present. */
3925 if (haveF2orF3(pfx)) {
3926 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
3927 *decode_OK = False;
3928 return delta;
3933 /* Limit src_val -- the bit offset -- to something within a word.
3934 The Intel docs say that literal offsets larger than a word are
3935 masked in this way. */
3936 switch (sz) {
3937 case 2: src_val &= 15; break;
3938 case 4: src_val &= 31; break;
3939 case 8: src_val &= 63; break;
3940 default: *decode_OK = False; return delta;
3943 /* Invent a mask suitable for the operation. */
3944 switch (gregLO3ofRM(modrm)) {
3945 case 4: /* BT */ mask = 0; break;
3946 case 5: /* BTS */ mask = 1ULL << src_val; break;
3947 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3948 case 7: /* BTC */ mask = 1ULL << src_val; break;
3949 /* If this needs to be extended, probably simplest to make a
3950 new function to handle the other cases (0 .. 3). The
3951 Intel docs do however not indicate any use for 0 .. 3, so
3952 we don't expect this to happen. */
3953 default: *decode_OK = False; return delta;
3956 /* Fetch the value to be tested and modified into t2, which is
3957 64-bits wide regardless of sz. */
3958 if (epartIsReg(modrm)) {
3959 vassert(am_sz == 1);
3960 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3961 delta += (am_sz + 1);
3962 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3963 nameISize(sz),
3964 src_val, nameIRegE(sz,pfx,modrm));
3965 } else {
3966 Int len;
3967 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
3968 delta += (len+1);
3969 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3970 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3971 nameISize(sz),
3972 src_val, dis_buf);
3975 /* Compute the new value into t2m, if non-BT. */
3976 switch (gregLO3ofRM(modrm)) {
3977 case 4: /* BT */
3978 break;
3979 case 5: /* BTS */
3980 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3981 break;
3982 case 6: /* BTR */
3983 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3984 break;
3985 case 7: /* BTC */
3986 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3987 break;
3988 default:
3989 /*NOTREACHED*/ /*the previous switch guards this*/
3990 vassert(0);
3993 /* Write the result back, if non-BT. */
3994 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3995 if (epartIsReg(modrm)) {
3996 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
3997 } else {
3998 if (haveLOCK(pfx)) {
3999 casLE( mkexpr(t_addr),
4000 narrowTo(ty, mkexpr(t2))/*expd*/,
4001 narrowTo(ty, mkexpr(t2m))/*new*/,
4002 guest_RIP_curr_instr );
4003 } else {
4004 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
4009 /* Copy relevant bit from t2 into the carry flag. */
4010 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
4011 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4012 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4013 stmt( IRStmt_Put(
4014 OFFB_CC_DEP1,
4015 binop(Iop_And64,
4016 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
4017 mkU64(1))
4019 /* Set NDEP even though it isn't used. This makes redundant-PUT
4020 elimination of previous stores to this field work better. */
4021 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
4023 return delta;
4027 /* Signed/unsigned widening multiply. Generate IR to multiply the
4028 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4029 RDX:RAX/EDX:EAX/DX:AX/AX.
4031 static void codegen_mulL_A_D ( Int sz, Bool syned,
4032 IRTemp tmp, const HChar* tmp_txt )
4034 IRType ty = szToITy(sz);
4035 IRTemp t1 = newTemp(ty);
4037 assign( t1, getIRegRAX(sz) );
4039 switch (ty) {
4040 case Ity_I64: {
4041 IRTemp res128 = newTemp(Ity_I128);
4042 IRTemp resHi = newTemp(Ity_I64);
4043 IRTemp resLo = newTemp(Ity_I64);
4044 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
4045 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4046 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
4047 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4048 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
4049 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
4050 putIReg64(R_RDX, mkexpr(resHi));
4051 putIReg64(R_RAX, mkexpr(resLo));
4052 break;
4054 case Ity_I32: {
4055 IRTemp res64 = newTemp(Ity_I64);
4056 IRTemp resHi = newTemp(Ity_I32);
4057 IRTemp resLo = newTemp(Ity_I32);
4058 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
4059 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4060 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
4061 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4062 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
4063 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
4064 putIRegRDX(4, mkexpr(resHi));
4065 putIRegRAX(4, mkexpr(resLo));
4066 break;
4068 case Ity_I16: {
4069 IRTemp res32 = newTemp(Ity_I32);
4070 IRTemp resHi = newTemp(Ity_I16);
4071 IRTemp resLo = newTemp(Ity_I16);
4072 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
4073 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4074 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
4075 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4076 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
4077 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
4078 putIRegRDX(2, mkexpr(resHi));
4079 putIRegRAX(2, mkexpr(resLo));
4080 break;
4082 case Ity_I8: {
4083 IRTemp res16 = newTemp(Ity_I16);
4084 IRTemp resHi = newTemp(Ity_I8);
4085 IRTemp resLo = newTemp(Ity_I8);
4086 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
4087 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4088 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
4089 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4090 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
4091 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
4092 putIRegRAX(2, mkexpr(res16));
4093 break;
4095 default:
4096 ppIRType(ty);
4097 vpanic("codegen_mulL_A_D(amd64)");
4099 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
4103 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4104 might be valid.*/
4105 static
4106 ULong dis_Grp3 ( const VexAbiInfo* vbi,
4107 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
4109 Long d64;
4110 UChar modrm;
4111 HChar dis_buf[50];
4112 Int len;
4113 IRTemp addr;
4114 IRType ty = szToITy(sz);
4115 IRTemp t1 = newTemp(ty);
4116 IRTemp dst1, src, dst0;
4117 *decode_OK = True;
4118 modrm = getUChar(delta);
4119 if (epartIsReg(modrm)) {
4120 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4121 if (haveF2orF3(pfx)) goto unhandled;
4122 switch (gregLO3ofRM(modrm)) {
4123 case 0: { /* TEST */
4124 delta++;
4125 d64 = getSDisp(imin(4,sz), delta);
4126 delta += imin(4,sz);
4127 dst1 = newTemp(ty);
4128 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4129 getIRegE(sz,pfx,modrm),
4130 mkU(ty, d64 & mkSizeMask(sz))));
4131 setFlags_DEP1( Iop_And8, dst1, ty );
4132 DIP("test%c $%lld, %s\n",
4133 nameISize(sz), d64,
4134 nameIRegE(sz, pfx, modrm));
4135 break;
4137 case 1:
4138 *decode_OK = False;
4139 return delta;
4140 case 2: /* NOT */
4141 delta++;
4142 putIRegE(sz, pfx, modrm,
4143 unop(mkSizedOp(ty,Iop_Not8),
4144 getIRegE(sz, pfx, modrm)));
4145 DIP("not%c %s\n", nameISize(sz),
4146 nameIRegE(sz, pfx, modrm));
4147 break;
4148 case 3: /* NEG */
4149 delta++;
4150 dst0 = newTemp(ty);
4151 src = newTemp(ty);
4152 dst1 = newTemp(ty);
4153 assign(dst0, mkU(ty,0));
4154 assign(src, getIRegE(sz, pfx, modrm));
4155 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4156 mkexpr(src)));
4157 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4158 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4159 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
4160 break;
4161 case 4: /* MUL (unsigned widening) */
4162 delta++;
4163 src = newTemp(ty);
4164 assign(src, getIRegE(sz,pfx,modrm));
4165 codegen_mulL_A_D ( sz, False, src,
4166 nameIRegE(sz,pfx,modrm) );
4167 break;
4168 case 5: /* IMUL (signed widening) */
4169 delta++;
4170 src = newTemp(ty);
4171 assign(src, getIRegE(sz,pfx,modrm));
4172 codegen_mulL_A_D ( sz, True, src,
4173 nameIRegE(sz,pfx,modrm) );
4174 break;
4175 case 6: /* DIV */
4176 delta++;
4177 assign( t1, getIRegE(sz, pfx, modrm) );
4178 codegen_div ( sz, t1, False );
4179 DIP("div%c %s\n", nameISize(sz),
4180 nameIRegE(sz, pfx, modrm));
4181 break;
4182 case 7: /* IDIV */
4183 delta++;
4184 assign( t1, getIRegE(sz, pfx, modrm) );
4185 codegen_div ( sz, t1, True );
4186 DIP("idiv%c %s\n", nameISize(sz),
4187 nameIRegE(sz, pfx, modrm));
4188 break;
4189 default:
4190 /*NOTREACHED*/
4191 vpanic("Grp3(amd64,R)");
4193 } else {
4194 /* Decide if F2/XACQ or F3/XREL might be valid. */
4195 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4196 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4197 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4198 validF2orF3 = True;
4200 if (!validF2orF3) goto unhandled;
4201 /* */
4202 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
4203 /* we have to inform disAMode of any immediate
4204 bytes used */
4205 gregLO3ofRM(modrm)==0/*TEST*/
4206 ? imin(4,sz)
4209 t1 = newTemp(ty);
4210 delta += len;
4211 assign(t1, loadLE(ty,mkexpr(addr)));
4212 switch (gregLO3ofRM(modrm)) {
4213 case 0: { /* TEST */
4214 d64 = getSDisp(imin(4,sz), delta);
4215 delta += imin(4,sz);
4216 dst1 = newTemp(ty);
4217 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4218 mkexpr(t1),
4219 mkU(ty, d64 & mkSizeMask(sz))));
4220 setFlags_DEP1( Iop_And8, dst1, ty );
4221 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4222 break;
4224 case 1:
4225 *decode_OK = False;
4226 return delta;
4227 case 2: /* NOT */
4228 dst1 = newTemp(ty);
4229 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
4230 if (haveLOCK(pfx)) {
4231 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4232 guest_RIP_curr_instr );
4233 } else {
4234 storeLE( mkexpr(addr), mkexpr(dst1) );
4236 DIP("not%c %s\n", nameISize(sz), dis_buf);
4237 break;
4238 case 3: /* NEG */
4239 dst0 = newTemp(ty);
4240 src = newTemp(ty);
4241 dst1 = newTemp(ty);
4242 assign(dst0, mkU(ty,0));
4243 assign(src, mkexpr(t1));
4244 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4245 mkexpr(src)));
4246 if (haveLOCK(pfx)) {
4247 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4248 guest_RIP_curr_instr );
4249 } else {
4250 storeLE( mkexpr(addr), mkexpr(dst1) );
4252 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4253 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4254 break;
4255 case 4: /* MUL (unsigned widening) */
4256 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4257 break;
4258 case 5: /* IMUL */
4259 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4260 break;
4261 case 6: /* DIV */
4262 codegen_div ( sz, t1, False );
4263 DIP("div%c %s\n", nameISize(sz), dis_buf);
4264 break;
4265 case 7: /* IDIV */
4266 codegen_div ( sz, t1, True );
4267 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4268 break;
4269 default:
4270 /*NOTREACHED*/
4271 vpanic("Grp3(amd64,M)");
4274 return delta;
4275 unhandled:
4276 *decode_OK = False;
4277 return delta;
4281 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4282 might be valid. */
4283 static
4284 ULong dis_Grp4 ( const VexAbiInfo* vbi,
4285 Prefix pfx, Long delta, Bool* decode_OK )
4287 Int alen;
4288 UChar modrm;
4289 HChar dis_buf[50];
4290 IRType ty = Ity_I8;
4291 IRTemp t1 = newTemp(ty);
4292 IRTemp t2 = newTemp(ty);
4294 *decode_OK = True;
4296 modrm = getUChar(delta);
4297 if (epartIsReg(modrm)) {
4298 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4299 if (haveF2orF3(pfx)) goto unhandled;
4300 assign(t1, getIRegE(1, pfx, modrm));
4301 switch (gregLO3ofRM(modrm)) {
4302 case 0: /* INC */
4303 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4304 putIRegE(1, pfx, modrm, mkexpr(t2));
4305 setFlags_INC_DEC( True, t2, ty );
4306 break;
4307 case 1: /* DEC */
4308 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4309 putIRegE(1, pfx, modrm, mkexpr(t2));
4310 setFlags_INC_DEC( False, t2, ty );
4311 break;
4312 default:
4313 *decode_OK = False;
4314 return delta;
4316 delta++;
4317 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
4318 nameIRegE(1, pfx, modrm));
4319 } else {
4320 /* Decide if F2/XACQ or F3/XREL might be valid. */
4321 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4322 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4323 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4324 validF2orF3 = True;
4326 if (!validF2orF3) goto unhandled;
4327 /* */
4328 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
4329 assign( t1, loadLE(ty, mkexpr(addr)) );
4330 switch (gregLO3ofRM(modrm)) {
4331 case 0: /* INC */
4332 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4333 if (haveLOCK(pfx)) {
4334 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4335 guest_RIP_curr_instr );
4336 } else {
4337 storeLE( mkexpr(addr), mkexpr(t2) );
4339 setFlags_INC_DEC( True, t2, ty );
4340 break;
4341 case 1: /* DEC */
4342 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4343 if (haveLOCK(pfx)) {
4344 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4345 guest_RIP_curr_instr );
4346 } else {
4347 storeLE( mkexpr(addr), mkexpr(t2) );
4349 setFlags_INC_DEC( False, t2, ty );
4350 break;
4351 default:
4352 *decode_OK = False;
4353 return delta;
4355 delta += alen;
4356 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
4358 return delta;
4359 unhandled:
4360 *decode_OK = False;
4361 return delta;
4365 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4366 might be valid. */
4367 static
4368 ULong dis_Grp5 ( const VexAbiInfo* vbi,
4369 Prefix pfx, Int sz, Long delta,
4370 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
4372 Int len;
4373 UChar modrm;
4374 HChar dis_buf[50];
4375 IRTemp addr = IRTemp_INVALID;
4376 IRType ty = szToITy(sz);
4377 IRTemp t1 = newTemp(ty);
4378 IRTemp t2 = IRTemp_INVALID;
4379 IRTemp t3 = IRTemp_INVALID;
4380 Bool showSz = True;
4382 *decode_OK = True;
4384 modrm = getUChar(delta);
4385 if (epartIsReg(modrm)) {
4386 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4387 F2/CALL and F2/JMP may have bnd prefix. */
4388 if (haveF2orF3(pfx)
4389 && ! (haveF2(pfx)
4390 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4391 goto unhandledR;
4392 assign(t1, getIRegE(sz,pfx,modrm));
4393 switch (gregLO3ofRM(modrm)) {
4394 case 0: /* INC */
4395 t2 = newTemp(ty);
4396 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4397 mkexpr(t1), mkU(ty,1)));
4398 setFlags_INC_DEC( True, t2, ty );
4399 putIRegE(sz,pfx,modrm, mkexpr(t2));
4400 break;
4401 case 1: /* DEC */
4402 t2 = newTemp(ty);
4403 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4404 mkexpr(t1), mkU(ty,1)));
4405 setFlags_INC_DEC( False, t2, ty );
4406 putIRegE(sz,pfx,modrm, mkexpr(t2));
4407 break;
4408 case 2: /* call Ev */
4409 /* Ignore any sz value and operate as if sz==8. */
4410 if (!(sz == 4 || sz == 8)) goto unhandledR;
4411 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4412 sz = 8;
4413 t3 = newTemp(Ity_I64);
4414 assign(t3, getIRegE(sz,pfx,modrm));
4415 t2 = newTemp(Ity_I64);
4416 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4417 putIReg64(R_RSP, mkexpr(t2));
4418 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
4419 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
4420 jmp_treg(dres, Ijk_Call, t3);
4421 vassert(dres->whatNext == Dis_StopHere);
4422 showSz = False;
4423 break;
4424 case 4: /* jmp Ev */
4425 /* Ignore any sz value and operate as if sz==8. */
4426 if (!(sz == 4 || sz == 8)) goto unhandledR;
4427 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4428 sz = 8;
4429 t3 = newTemp(Ity_I64);
4430 assign(t3, getIRegE(sz,pfx,modrm));
4431 jmp_treg(dres, Ijk_Boring, t3);
4432 vassert(dres->whatNext == Dis_StopHere);
4433 showSz = False;
4434 break;
4435 case 6: /* PUSH Ev */
4436 /* There is no encoding for 32-bit operand size; hence ... */
4437 if (sz == 4) sz = 8;
4438 if (sz == 8 || sz == 2) {
4439 ty = szToITy(sz); /* redo it, since sz might have changed */
4440 t3 = newTemp(ty);
4441 assign(t3, getIRegE(sz,pfx,modrm));
4442 t2 = newTemp(Ity_I64);
4443 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4444 putIReg64(R_RSP, mkexpr(t2) );
4445 storeLE( mkexpr(t2), mkexpr(t3) );
4446 break;
4447 } else {
4448 goto unhandledR; /* awaiting test case */
4450 default:
4451 unhandledR:
4452 *decode_OK = False;
4453 return delta;
4455 delta++;
4456 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4457 showSz ? nameISize(sz) : ' ',
4458 nameIRegE(sz, pfx, modrm));
4459 } else {
4460 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4461 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4462 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4463 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4464 validF2orF3 = True;
4465 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4466 && (haveF2(pfx) && !haveF3(pfx))) {
4467 validF2orF3 = True;
4469 if (!validF2orF3) goto unhandledM;
4470 /* */
4471 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
4472 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4473 && gregLO3ofRM(modrm) != 6) {
4474 assign(t1, loadLE(ty,mkexpr(addr)));
4476 switch (gregLO3ofRM(modrm)) {
4477 case 0: /* INC */
4478 t2 = newTemp(ty);
4479 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4480 mkexpr(t1), mkU(ty,1)));
4481 if (haveLOCK(pfx)) {
4482 casLE( mkexpr(addr),
4483 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4484 } else {
4485 storeLE(mkexpr(addr),mkexpr(t2));
4487 setFlags_INC_DEC( True, t2, ty );
4488 break;
4489 case 1: /* DEC */
4490 t2 = newTemp(ty);
4491 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4492 mkexpr(t1), mkU(ty,1)));
4493 if (haveLOCK(pfx)) {
4494 casLE( mkexpr(addr),
4495 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4496 } else {
4497 storeLE(mkexpr(addr),mkexpr(t2));
4499 setFlags_INC_DEC( False, t2, ty );
4500 break;
4501 case 2: /* call Ev */
4502 /* Ignore any sz value and operate as if sz==8. */
4503 if (!(sz == 4 || sz == 8)) goto unhandledM;
4504 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4505 sz = 8;
4506 t3 = newTemp(Ity_I64);
4507 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4508 t2 = newTemp(Ity_I64);
4509 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4510 putIReg64(R_RSP, mkexpr(t2));
4511 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4512 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4513 jmp_treg(dres, Ijk_Call, t3);
4514 vassert(dres->whatNext == Dis_StopHere);
4515 showSz = False;
4516 break;
4517 case 4: /* JMP Ev */
4518 /* Ignore any sz value and operate as if sz==8. */
4519 if (!(sz == 4 || sz == 8)) goto unhandledM;
4520 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4521 sz = 8;
4522 t3 = newTemp(Ity_I64);
4523 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4524 jmp_treg(dres, Ijk_Boring, t3);
4525 vassert(dres->whatNext == Dis_StopHere);
4526 showSz = False;
4527 break;
4528 case 6: /* PUSH Ev */
4529 /* There is no encoding for 32-bit operand size; hence ... */
4530 if (sz == 4) sz = 8;
4531 if (sz == 8 || sz == 2) {
4532 ty = szToITy(sz); /* redo it, since sz might have changed */
4533 t3 = newTemp(ty);
4534 assign(t3, loadLE(ty,mkexpr(addr)));
4535 t2 = newTemp(Ity_I64);
4536 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4537 putIReg64(R_RSP, mkexpr(t2) );
4538 storeLE( mkexpr(t2), mkexpr(t3) );
4539 break;
4540 } else {
4541 goto unhandledM; /* awaiting test case */
4543 default:
4544 unhandledM:
4545 *decode_OK = False;
4546 return delta;
4548 delta += len;
4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4550 showSz ? nameISize(sz) : ' ',
4551 dis_buf);
4553 return delta;
4557 /*------------------------------------------------------------*/
4558 /*--- Disassembling string ops (including REP prefixes) ---*/
4559 /*------------------------------------------------------------*/
4561 /* Code shared by all the string ops */
4562 static
4563 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4565 UChar logSz;
4566 if (sz == 8 || sz == 4 || sz == 2) {
4567 logSz = 1;
4568 if (sz == 4) logSz = 2;
4569 if (sz == 8) logSz = 3;
4570 assign( t_inc,
4571 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4572 mkU8(logSz) ) );
4573 } else {
4574 assign( t_inc,
4575 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4579 static
4580 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
4581 Int sz, const HChar* name, Prefix pfx )
4583 IRTemp t_inc = newTemp(Ity_I64);
4584 /* Really we ought to inspect the override prefixes, but we don't.
4585 The following assertion catches any resulting sillyness. */
4586 vassert(pfx == clearSegBits(pfx));
4587 dis_string_op_increment(sz, t_inc);
4588 dis_OP( sz, t_inc, pfx );
4589 DIP("%s%c\n", name, nameISize(sz));
4592 static
4593 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
4595 IRType ty = szToITy(sz);
4596 IRTemp td = newTemp(Ity_I64); /* RDI */
4597 IRTemp ts = newTemp(Ity_I64); /* RSI */
4598 IRExpr *incd, *incs;
4600 if (haveASO(pfx)) {
4601 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4602 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4603 } else {
4604 assign( td, getIReg64(R_RDI) );
4605 assign( ts, getIReg64(R_RSI) );
4608 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4610 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4611 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4612 if (haveASO(pfx)) {
4613 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4614 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4616 putIReg64( R_RDI, incd );
4617 putIReg64( R_RSI, incs );
4620 static
4621 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
4623 IRType ty = szToITy(sz);
4624 IRTemp ts = newTemp(Ity_I64); /* RSI */
4625 IRExpr *incs;
4627 if (haveASO(pfx))
4628 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4629 else
4630 assign( ts, getIReg64(R_RSI) );
4632 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4634 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4635 if (haveASO(pfx))
4636 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4637 putIReg64( R_RSI, incs );
4640 static
4641 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
4643 IRType ty = szToITy(sz);
4644 IRTemp ta = newTemp(ty); /* rAX */
4645 IRTemp td = newTemp(Ity_I64); /* RDI */
4646 IRExpr *incd;
4648 assign( ta, getIRegRAX(sz) );
4650 if (haveASO(pfx))
4651 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4652 else
4653 assign( td, getIReg64(R_RDI) );
4655 storeLE( mkexpr(td), mkexpr(ta) );
4657 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4658 if (haveASO(pfx))
4659 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4660 putIReg64( R_RDI, incd );
4663 static
4664 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
4666 IRType ty = szToITy(sz);
4667 IRTemp tdv = newTemp(ty); /* (RDI) */
4668 IRTemp tsv = newTemp(ty); /* (RSI) */
4669 IRTemp td = newTemp(Ity_I64); /* RDI */
4670 IRTemp ts = newTemp(Ity_I64); /* RSI */
4671 IRExpr *incd, *incs;
4673 if (haveASO(pfx)) {
4674 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4675 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4676 } else {
4677 assign( td, getIReg64(R_RDI) );
4678 assign( ts, getIReg64(R_RSI) );
4681 assign( tdv, loadLE(ty,mkexpr(td)) );
4683 assign( tsv, loadLE(ty,mkexpr(ts)) );
4685 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4687 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4688 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4689 if (haveASO(pfx)) {
4690 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4691 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4693 putIReg64( R_RDI, incd );
4694 putIReg64( R_RSI, incs );
4697 static
4698 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
4700 IRType ty = szToITy(sz);
4701 IRTemp ta = newTemp(ty); /* rAX */
4702 IRTemp td = newTemp(Ity_I64); /* RDI */
4703 IRTemp tdv = newTemp(ty); /* (RDI) */
4704 IRExpr *incd;
4706 assign( ta, getIRegRAX(sz) );
4708 if (haveASO(pfx))
4709 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4710 else
4711 assign( td, getIReg64(R_RDI) );
4713 assign( tdv, loadLE(ty,mkexpr(td)) );
4715 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4717 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4718 if (haveASO(pfx))
4719 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4720 putIReg64( R_RDI, incd );
4724 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4725 the insn is the last one in the basic block, and so emit a jump to
4726 the next insn, rather than just falling through. */
4727 static
4728 void dis_REP_op ( /*MOD*/DisResult* dres,
4729 AMD64Condcode cond,
4730 void (*dis_OP)(Int, IRTemp, Prefix),
4731 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
4732 Prefix pfx )
4734 IRTemp t_inc = newTemp(Ity_I64);
4735 IRTemp tc;
4736 IRExpr* cmp;
4738 /* Really we ought to inspect the override prefixes, but we don't.
4739 The following assertion catches any resulting sillyness. */
4740 vassert(pfx == clearSegBits(pfx));
4742 if (haveASO(pfx)) {
4743 tc = newTemp(Ity_I32); /* ECX */
4744 assign( tc, getIReg32(R_RCX) );
4745 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4746 } else {
4747 tc = newTemp(Ity_I64); /* RCX */
4748 assign( tc, getIReg64(R_RCX) );
4749 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4752 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4753 IRConst_U64(rip_next), OFFB_RIP ) );
4755 if (haveASO(pfx))
4756 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4757 else
4758 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4760 dis_string_op_increment(sz, t_inc);
4761 dis_OP (sz, t_inc, pfx);
4763 if (cond == AMD64CondAlways) {
4764 jmp_lit(dres, Ijk_Boring, rip);
4765 vassert(dres->whatNext == Dis_StopHere);
4766 } else {
4767 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4768 Ijk_Boring,
4769 IRConst_U64(rip),
4770 OFFB_RIP ) );
4771 jmp_lit(dres, Ijk_Boring, rip_next);
4772 vassert(dres->whatNext == Dis_StopHere);
4774 DIP("%s%c\n", name, nameISize(sz));
4778 /*------------------------------------------------------------*/
4779 /*--- Arithmetic, etc. ---*/
4780 /*------------------------------------------------------------*/
4782 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4783 static
4784 ULong dis_mul_E_G ( const VexAbiInfo* vbi,
4785 Prefix pfx,
4786 Int size,
4787 Long delta0 )
4789 Int alen;
4790 HChar dis_buf[50];
4791 UChar rm = getUChar(delta0);
4792 IRType ty = szToITy(size);
4793 IRTemp te = newTemp(ty);
4794 IRTemp tg = newTemp(ty);
4795 IRTemp resLo = newTemp(ty);
4797 assign( tg, getIRegG(size, pfx, rm) );
4798 if (epartIsReg(rm)) {
4799 assign( te, getIRegE(size, pfx, rm) );
4800 } else {
4801 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4802 assign( te, loadLE(ty,mkexpr(addr)) );
4805 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4807 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4809 putIRegG(size, pfx, rm, mkexpr(resLo) );
4811 if (epartIsReg(rm)) {
4812 DIP("imul%c %s, %s\n", nameISize(size),
4813 nameIRegE(size,pfx,rm),
4814 nameIRegG(size,pfx,rm));
4815 return 1+delta0;
4816 } else {
4817 DIP("imul%c %s, %s\n", nameISize(size),
4818 dis_buf,
4819 nameIRegG(size,pfx,rm));
4820 return alen+delta0;
4825 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4826 static
4827 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi,
4828 Prefix pfx,
4829 Int size,
4830 Long delta,
4831 Int litsize )
4833 Long d64;
4834 Int alen;
4835 HChar dis_buf[50];
4836 UChar rm = getUChar(delta);
4837 IRType ty = szToITy(size);
4838 IRTemp te = newTemp(ty);
4839 IRTemp tl = newTemp(ty);
4840 IRTemp resLo = newTemp(ty);
4842 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4844 if (epartIsReg(rm)) {
4845 assign(te, getIRegE(size, pfx, rm));
4846 delta++;
4847 } else {
4848 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4849 imin(4,litsize) );
4850 assign(te, loadLE(ty, mkexpr(addr)));
4851 delta += alen;
4853 d64 = getSDisp(imin(4,litsize),delta);
4854 delta += imin(4,litsize);
4856 d64 &= mkSizeMask(size);
4857 assign(tl, mkU(ty,d64));
4859 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4861 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4863 putIRegG(size, pfx, rm, mkexpr(resLo));
4865 DIP("imul%c $%lld, %s, %s\n",
4866 nameISize(size), d64,
4867 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4868 nameIRegG(size,pfx,rm) );
4869 return delta;
4873 /* Generate an IR sequence to do a popcount operation on the supplied
4874 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4875 Ity_I16, Ity_I32 or Ity_I64 only. */
4876 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4878 Int i;
4879 if (ty == Ity_I16) {
4880 IRTemp old = IRTemp_INVALID;
4881 IRTemp nyu = IRTemp_INVALID;
4882 IRTemp mask[4], shift[4];
4883 for (i = 0; i < 4; i++) {
4884 mask[i] = newTemp(ty);
4885 shift[i] = 1 << i;
4887 assign(mask[0], mkU16(0x5555));
4888 assign(mask[1], mkU16(0x3333));
4889 assign(mask[2], mkU16(0x0F0F));
4890 assign(mask[3], mkU16(0x00FF));
4891 old = src;
4892 for (i = 0; i < 4; i++) {
4893 nyu = newTemp(ty);
4894 assign(nyu,
4895 binop(Iop_Add16,
4896 binop(Iop_And16,
4897 mkexpr(old),
4898 mkexpr(mask[i])),
4899 binop(Iop_And16,
4900 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4901 mkexpr(mask[i]))));
4902 old = nyu;
4904 return nyu;
4906 if (ty == Ity_I32) {
4907 IRTemp old = IRTemp_INVALID;
4908 IRTemp nyu = IRTemp_INVALID;
4909 IRTemp mask[5], shift[5];
4910 for (i = 0; i < 5; i++) {
4911 mask[i] = newTemp(ty);
4912 shift[i] = 1 << i;
4914 assign(mask[0], mkU32(0x55555555));
4915 assign(mask[1], mkU32(0x33333333));
4916 assign(mask[2], mkU32(0x0F0F0F0F));
4917 assign(mask[3], mkU32(0x00FF00FF));
4918 assign(mask[4], mkU32(0x0000FFFF));
4919 old = src;
4920 for (i = 0; i < 5; i++) {
4921 nyu = newTemp(ty);
4922 assign(nyu,
4923 binop(Iop_Add32,
4924 binop(Iop_And32,
4925 mkexpr(old),
4926 mkexpr(mask[i])),
4927 binop(Iop_And32,
4928 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4929 mkexpr(mask[i]))));
4930 old = nyu;
4932 return nyu;
4934 if (ty == Ity_I64) {
4935 IRTemp old = IRTemp_INVALID;
4936 IRTemp nyu = IRTemp_INVALID;
4937 IRTemp mask[6], shift[6];
4938 for (i = 0; i < 6; i++) {
4939 mask[i] = newTemp(ty);
4940 shift[i] = 1 << i;
4942 assign(mask[0], mkU64(0x5555555555555555ULL));
4943 assign(mask[1], mkU64(0x3333333333333333ULL));
4944 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4945 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4946 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4947 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4948 old = src;
4949 for (i = 0; i < 6; i++) {
4950 nyu = newTemp(ty);
4951 assign(nyu,
4952 binop(Iop_Add64,
4953 binop(Iop_And64,
4954 mkexpr(old),
4955 mkexpr(mask[i])),
4956 binop(Iop_And64,
4957 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4958 mkexpr(mask[i]))));
4959 old = nyu;
4961 return nyu;
4963 /*NOTREACHED*/
4964 vassert(0);
4968 /* Generate an IR sequence to do a count-leading-zeroes operation on
4969 the supplied IRTemp, and return a new IRTemp holding the result.
4970 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4971 the argument is zero, return the number of bits in the word (the
4972 natural semantics). */
4973 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4975 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4977 IRTemp src64 = newTemp(Ity_I64);
4978 assign(src64, widenUto64( mkexpr(src) ));
4980 IRTemp src64x = newTemp(Ity_I64);
4981 assign(src64x,
4982 binop(Iop_Shl64, mkexpr(src64),
4983 mkU8(64 - 8 * sizeofIRType(ty))));
4985 // Clz64 has undefined semantics when its input is zero, so
4986 // special-case around that.
4987 IRTemp res64 = newTemp(Ity_I64);
4988 assign(res64,
4989 IRExpr_ITE(
4990 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
4991 mkU64(8 * sizeofIRType(ty)),
4992 unop(Iop_Clz64, mkexpr(src64x))
4995 IRTemp res = newTemp(ty);
4996 assign(res, narrowTo(ty, mkexpr(res64)));
4997 return res;
5001 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5002 the supplied IRTemp, and return a new IRTemp holding the result.
5003 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5004 the argument is zero, return the number of bits in the word (the
5005 natural semantics). */
5006 static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
5008 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5010 IRTemp src64 = newTemp(Ity_I64);
5011 assign(src64, widenUto64( mkexpr(src) ));
5013 // Ctz64 has undefined semantics when its input is zero, so
5014 // special-case around that.
5015 IRTemp res64 = newTemp(Ity_I64);
5016 assign(res64,
5017 IRExpr_ITE(
5018 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
5019 mkU64(8 * sizeofIRType(ty)),
5020 unop(Iop_Ctz64, mkexpr(src64))
5023 IRTemp res = newTemp(ty);
5024 assign(res, narrowTo(ty, mkexpr(res64)));
5025 return res;
5029 /*------------------------------------------------------------*/
5030 /*--- ---*/
5031 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5032 /*--- ---*/
5033 /*------------------------------------------------------------*/
5035 /* --- Helper functions for dealing with the register stack. --- */
5037 /* --- Set the emulation-warning pseudo-register. --- */
5039 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
5041 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5042 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
5045 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5047 static IRExpr* mkQNaN64 ( void )
5049 /* QNaN is 0 2047 1 0(51times)
5050 == 0b 11111111111b 1 0(51times)
5051 == 0x7FF8 0000 0000 0000
5053 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
5056 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5058 static IRExpr* get_ftop ( void )
5060 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
5063 static void put_ftop ( IRExpr* e )
5065 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5066 stmt( IRStmt_Put( OFFB_FTOP, e ) );
5069 /* --------- Get/put the C3210 bits. --------- */
5071 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
5073 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
5076 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
5078 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
5079 stmt( IRStmt_Put( OFFB_FC3210, e ) );
5082 /* --------- Get/put the FPU rounding mode. --------- */
5083 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
5085 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
5088 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
5090 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5091 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
5095 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5096 /* Produces a value in 0 .. 3, which is encoded as per the type
5097 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5098 per IRRoundingMode, we merely need to get it and mask it for
5099 safety.
5101 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
5103 return binop( Iop_And32, get_fpround(), mkU32(3) );
5106 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5108 return mkU32(Irrm_NEAREST);
5112 /* --------- Get/set FP register tag bytes. --------- */
5114 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5116 static void put_ST_TAG ( Int i, IRExpr* value )
5118 IRRegArray* descr;
5119 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5120 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5121 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5124 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5125 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5127 static IRExpr* get_ST_TAG ( Int i )
5129 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5130 return IRExpr_GetI( descr, get_ftop(), i );
5134 /* --------- Get/set FP registers. --------- */
5136 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5137 register's tag to indicate the register is full. The previous
5138 state of the register is not checked. */
5140 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5142 IRRegArray* descr;
5143 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5144 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5145 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5146 /* Mark the register as in-use. */
5147 put_ST_TAG(i, mkU8(1));
5150 /* Given i, and some expression e, emit
5151 ST(i) = is_full(i) ? NaN : e
5152 and set the tag accordingly.
5155 static void put_ST ( Int i, IRExpr* value )
5157 put_ST_UNCHECKED(
5159 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5160 /* non-0 means full */
5161 mkQNaN64(),
5162 /* 0 means empty */
5163 value
5169 /* Given i, generate an expression yielding 'ST(i)'. */
5171 static IRExpr* get_ST_UNCHECKED ( Int i )
5173 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5174 return IRExpr_GetI( descr, get_ftop(), i );
5178 /* Given i, generate an expression yielding
5179 is_full(i) ? ST(i) : NaN
5182 static IRExpr* get_ST ( Int i )
5184 return
5185 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5186 /* non-0 means full */
5187 get_ST_UNCHECKED(i),
5188 /* 0 means empty */
5189 mkQNaN64());
5193 /* Given i, and some expression e, and a condition cond, generate IR
5194 which has the same effect as put_ST(i,e) when cond is true and has
5195 no effect when cond is false. Given the lack of proper
5196 if-then-else in the IR, this is pretty tricky.
5199 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5201 // new_tag = if cond then FULL else old_tag
5202 // new_val = if cond then (if old_tag==FULL then NaN else val)
5203 // else old_val
5205 IRTemp old_tag = newTemp(Ity_I8);
5206 assign(old_tag, get_ST_TAG(i));
5207 IRTemp new_tag = newTemp(Ity_I8);
5208 assign(new_tag,
5209 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5211 IRTemp old_val = newTemp(Ity_F64);
5212 assign(old_val, get_ST_UNCHECKED(i));
5213 IRTemp new_val = newTemp(Ity_F64);
5214 assign(new_val,
5215 IRExpr_ITE(mkexpr(cond),
5216 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5217 /* non-0 means full */
5218 mkQNaN64(),
5219 /* 0 means empty */
5220 value),
5221 mkexpr(old_val)));
5223 put_ST_UNCHECKED(i, mkexpr(new_val));
5224 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5225 // now set it to new_tag instead.
5226 put_ST_TAG(i, mkexpr(new_tag));
5229 /* Adjust FTOP downwards by one register. */
5231 static void fp_push ( void )
5233 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5236 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5237 don't change it. */
5239 static void maybe_fp_push ( IRTemp cond )
5241 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5244 /* Adjust FTOP upwards by one register, and mark the vacated register
5245 as empty. */
5247 static void fp_pop ( void )
5249 put_ST_TAG(0, mkU8(0));
5250 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5253 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5254 e[31:1] == 0.
5256 static void set_C2 ( IRExpr* e )
5258 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5259 put_C3210( binop(Iop_Or64,
5260 cleared,
5261 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5264 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5265 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5266 test is simple, but the derivation of it is not so simple.
5268 The exponent field for an IEEE754 double is 11 bits. That means it
5269 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5270 the number is either a NaN or an Infinity and so is not finite.
5271 Furthermore, a finite value of exactly 2^63 is the smallest value
5272 that has exponent value 0x43E. Hence, what we need to do is
5273 extract the exponent, ignoring the sign bit and mantissa, and check
5274 it is < 0x43E, or <= 0x43D.
5276 To make this easily applicable to 32- and 64-bit targets, a
5277 roundabout approach is used. First the number is converted to I64,
5278 then the top 32 bits are taken. Shifting them right by 20 bits
5279 places the sign bit and exponent in the bottom 12 bits. Anding
5280 with 0x7FF gets rid of the sign bit, leaving just the exponent
5281 available for comparison.
5283 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5285 IRTemp i64 = newTemp(Ity_I64);
5286 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5287 IRTemp exponent = newTemp(Ity_I32);
5288 assign(exponent,
5289 binop(Iop_And32,
5290 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5291 mkU32(0x7FF)));
5292 IRTemp in_range_and_finite = newTemp(Ity_I1);
5293 assign(in_range_and_finite,
5294 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5295 return in_range_and_finite;
5298 /* Invent a plausible-looking FPU status word value:
5299 ((ftop & 7) << 11) | (c3210 & 0x4700)
5301 static IRExpr* get_FPU_sw ( void )
5303 return
5304 unop(Iop_32to16,
5305 binop(Iop_Or32,
5306 binop(Iop_Shl32,
5307 binop(Iop_And32, get_ftop(), mkU32(7)),
5308 mkU8(11)),
5309 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5310 mkU32(0x4700))
5315 /* Generate a dirty helper call that initialises the x87 state a la
5316 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5317 |guard| is used as a guarding condition.
5319 static void gen_FINIT_SEQUENCE ( IRExpr* guard )
5321 /* Uses dirty helper:
5322 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5323 IRDirty* d = unsafeIRDirty_0_N (
5324 0/*regparms*/,
5325 "amd64g_dirtyhelper_FINIT",
5326 &amd64g_dirtyhelper_FINIT,
5327 mkIRExprVec_1( IRExpr_GSPTR() )
5330 /* declare we're writing guest state */
5331 d->nFxState = 5;
5332 vex_bzero(&d->fxState, sizeof(d->fxState));
5334 d->fxState[0].fx = Ifx_Write;
5335 d->fxState[0].offset = OFFB_FTOP;
5336 d->fxState[0].size = sizeof(UInt);
5338 d->fxState[1].fx = Ifx_Write;
5339 d->fxState[1].offset = OFFB_FPREGS;
5340 d->fxState[1].size = 8 * sizeof(ULong);
5342 d->fxState[2].fx = Ifx_Write;
5343 d->fxState[2].offset = OFFB_FPTAGS;
5344 d->fxState[2].size = 8 * sizeof(UChar);
5346 d->fxState[3].fx = Ifx_Write;
5347 d->fxState[3].offset = OFFB_FPROUND;
5348 d->fxState[3].size = sizeof(ULong);
5350 d->fxState[4].fx = Ifx_Write;
5351 d->fxState[4].offset = OFFB_FC3210;
5352 d->fxState[4].size = sizeof(ULong);
5354 if (guard)
5355 d->guard = guard;
5357 stmt( IRStmt_Dirty(d) );
5361 /* ------------------------------------------------------- */
5362 /* Given all that stack-mangling junk, we can now go ahead
5363 and describe FP instructions.
5366 /* ST(0) = ST(0) `op` mem64/32(addr)
5367 Need to check ST(0)'s tag on read, but not on write.
5369 static
5370 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5371 IROp op, Bool dbl )
5373 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5374 if (dbl) {
5375 put_ST_UNCHECKED(0,
5376 triop( op,
5377 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5378 get_ST(0),
5379 loadLE(Ity_F64,mkexpr(addr))
5381 } else {
5382 put_ST_UNCHECKED(0,
5383 triop( op,
5384 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5385 get_ST(0),
5386 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5392 /* ST(0) = mem64/32(addr) `op` ST(0)
5393 Need to check ST(0)'s tag on read, but not on write.
5395 static
5396 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5397 IROp op, Bool dbl )
5399 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5400 if (dbl) {
5401 put_ST_UNCHECKED(0,
5402 triop( op,
5403 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5404 loadLE(Ity_F64,mkexpr(addr)),
5405 get_ST(0)
5407 } else {
5408 put_ST_UNCHECKED(0,
5409 triop( op,
5410 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5411 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5412 get_ST(0)
5418 /* ST(dst) = ST(dst) `op` ST(src).
5419 Check dst and src tags when reading but not on write.
5421 static
5422 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5423 Bool pop_after )
5425 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5426 put_ST_UNCHECKED(
5427 st_dst,
5428 triop( op,
5429 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5430 get_ST(st_dst),
5431 get_ST(st_src) )
5433 if (pop_after)
5434 fp_pop();
5437 /* ST(dst) = ST(src) `op` ST(dst).
5438 Check dst and src tags when reading but not on write.
5440 static
5441 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5442 Bool pop_after )
5444 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5445 put_ST_UNCHECKED(
5446 st_dst,
5447 triop( op,
5448 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5449 get_ST(st_src),
5450 get_ST(st_dst) )
5452 if (pop_after)
5453 fp_pop();
5456 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5457 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5459 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
5460 /* This is a bit of a hack (and isn't really right). It sets
5461 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5462 documentation implies A and S are unchanged.
5464 /* It's also fishy in that it is used both for COMIP and
5465 UCOMIP, and they aren't the same (although similar). */
5466 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5467 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5468 stmt( IRStmt_Put(
5469 OFFB_CC_DEP1,
5470 binop( Iop_And64,
5471 unop( Iop_32Uto64,
5472 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5473 mkU64(0x45)
5474 )));
5475 if (pop_after)
5476 fp_pop();
5480 /* returns
5481 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5483 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5485 IRTemp t32 = newTemp(Ity_I32);
5486 assign( t32, e32 );
5487 return
5488 IRExpr_ITE(
5489 binop(Iop_CmpLT64U,
5490 unop(Iop_32Uto64,
5491 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5492 mkU64(65536)),
5493 unop(Iop_32to16, mkexpr(t32)),
5494 mkU16( 0x8000 ) );
5498 static
5499 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
5500 const VexAbiInfo* vbi, Prefix pfx, Long delta )
5502 Int len;
5503 UInt r_src, r_dst;
5504 HChar dis_buf[50];
5505 IRTemp t1, t2;
5507 /* On entry, delta points at the second byte of the insn (the modrm
5508 byte).*/
5509 UChar first_opcode = getUChar(delta-1);
5510 UChar modrm = getUChar(delta+0);
5512 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5514 if (first_opcode == 0xD8) {
5515 if (modrm < 0xC0) {
5517 /* bits 5,4,3 are an opcode extension, and the modRM also
5518 specifies an address. */
5519 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5520 delta += len;
5522 switch (gregLO3ofRM(modrm)) {
5524 case 0: /* FADD single-real */
5525 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5526 break;
5528 case 1: /* FMUL single-real */
5529 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5530 break;
5532 case 2: /* FCOM single-real */
5533 DIP("fcoms %s\n", dis_buf);
5534 /* This forces C1 to zero, which isn't right. */
5535 /* The AMD documentation suggests that forcing C1 to
5536 zero is correct (Eliot Moss) */
5537 put_C3210(
5538 unop( Iop_32Uto64,
5539 binop( Iop_And32,
5540 binop(Iop_Shl32,
5541 binop(Iop_CmpF64,
5542 get_ST(0),
5543 unop(Iop_F32toF64,
5544 loadLE(Ity_F32,mkexpr(addr)))),
5545 mkU8(8)),
5546 mkU32(0x4500)
5547 )));
5548 break;
5550 case 3: /* FCOMP single-real */
5551 /* The AMD documentation suggests that forcing C1 to
5552 zero is correct (Eliot Moss) */
5553 DIP("fcomps %s\n", dis_buf);
5554 /* This forces C1 to zero, which isn't right. */
5555 put_C3210(
5556 unop( Iop_32Uto64,
5557 binop( Iop_And32,
5558 binop(Iop_Shl32,
5559 binop(Iop_CmpF64,
5560 get_ST(0),
5561 unop(Iop_F32toF64,
5562 loadLE(Ity_F32,mkexpr(addr)))),
5563 mkU8(8)),
5564 mkU32(0x4500)
5565 )));
5566 fp_pop();
5567 break;
5569 case 4: /* FSUB single-real */
5570 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5571 break;
5573 case 5: /* FSUBR single-real */
5574 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5575 break;
5577 case 6: /* FDIV single-real */
5578 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5579 break;
5581 case 7: /* FDIVR single-real */
5582 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5583 break;
5585 default:
5586 vex_printf("unhandled opc_aux = 0x%2x\n",
5587 (UInt)gregLO3ofRM(modrm));
5588 vex_printf("first_opcode == 0xD8\n");
5589 goto decode_fail;
5591 } else {
5592 delta++;
5593 switch (modrm) {
5595 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5596 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5597 break;
5599 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5600 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5601 break;
5603 /* Dunno if this is right */
5604 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5605 r_dst = (UInt)modrm - 0xD0;
5606 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
5607 /* This forces C1 to zero, which isn't right. */
5608 put_C3210(
5609 unop(Iop_32Uto64,
5610 binop( Iop_And32,
5611 binop(Iop_Shl32,
5612 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5613 mkU8(8)),
5614 mkU32(0x4500)
5615 )));
5616 break;
5618 /* Dunno if this is right */
5619 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5620 r_dst = (UInt)modrm - 0xD8;
5621 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
5622 /* This forces C1 to zero, which isn't right. */
5623 put_C3210(
5624 unop(Iop_32Uto64,
5625 binop( Iop_And32,
5626 binop(Iop_Shl32,
5627 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5628 mkU8(8)),
5629 mkU32(0x4500)
5630 )));
5631 fp_pop();
5632 break;
5634 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5635 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5636 break;
5638 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5639 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5640 break;
5642 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5643 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5644 break;
5646 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5647 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5648 break;
5650 default:
5651 goto decode_fail;
5656 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5657 else
5658 if (first_opcode == 0xD9) {
5659 if (modrm < 0xC0) {
5661 /* bits 5,4,3 are an opcode extension, and the modRM also
5662 specifies an address. */
5663 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5664 delta += len;
5666 switch (gregLO3ofRM(modrm)) {
5668 case 0: /* FLD single-real */
5669 DIP("flds %s\n", dis_buf);
5670 fp_push();
5671 put_ST(0, unop(Iop_F32toF64,
5672 loadLE(Ity_F32, mkexpr(addr))));
5673 break;
5675 case 2: /* FST single-real */
5676 DIP("fsts %s\n", dis_buf);
5677 storeLE(mkexpr(addr),
5678 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5679 break;
5681 case 3: /* FSTP single-real */
5682 DIP("fstps %s\n", dis_buf);
5683 storeLE(mkexpr(addr),
5684 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5685 fp_pop();
5686 break;
5688 case 4: { /* FLDENV m28 */
5689 /* Uses dirty helper:
5690 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5691 IRTemp ew = newTemp(Ity_I32);
5692 IRTemp w64 = newTemp(Ity_I64);
5693 IRDirty* d = unsafeIRDirty_0_N (
5694 0/*regparms*/,
5695 "amd64g_dirtyhelper_FLDENV",
5696 &amd64g_dirtyhelper_FLDENV,
5697 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5699 d->tmp = w64;
5700 /* declare we're reading memory */
5701 d->mFx = Ifx_Read;
5702 d->mAddr = mkexpr(addr);
5703 d->mSize = 28;
5705 /* declare we're writing guest state */
5706 d->nFxState = 4;
5707 vex_bzero(&d->fxState, sizeof(d->fxState));
5709 d->fxState[0].fx = Ifx_Write;
5710 d->fxState[0].offset = OFFB_FTOP;
5711 d->fxState[0].size = sizeof(UInt);
5713 d->fxState[1].fx = Ifx_Write;
5714 d->fxState[1].offset = OFFB_FPTAGS;
5715 d->fxState[1].size = 8 * sizeof(UChar);
5717 d->fxState[2].fx = Ifx_Write;
5718 d->fxState[2].offset = OFFB_FPROUND;
5719 d->fxState[2].size = sizeof(ULong);
5721 d->fxState[3].fx = Ifx_Write;
5722 d->fxState[3].offset = OFFB_FC3210;
5723 d->fxState[3].size = sizeof(ULong);
5725 stmt( IRStmt_Dirty(d) );
5727 /* ew contains any emulation warning we may need to
5728 issue. If needed, side-exit to the next insn,
5729 reporting the warning, so that Valgrind's dispatcher
5730 sees the warning. */
5731 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5732 put_emwarn( mkexpr(ew) );
5733 stmt(
5734 IRStmt_Exit(
5735 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5736 Ijk_EmWarn,
5737 IRConst_U64( guest_RIP_bbstart+delta ),
5738 OFFB_RIP
5742 DIP("fldenv %s\n", dis_buf);
5743 break;
5746 case 5: {/* FLDCW */
5747 /* The only thing we observe in the control word is the
5748 rounding mode. Therefore, pass the 16-bit value
5749 (x87 native-format control word) to a clean helper,
5750 getting back a 64-bit value, the lower half of which
5751 is the FPROUND value to store, and the upper half of
5752 which is the emulation-warning token which may be
5753 generated.
5755 /* ULong amd64h_check_fldcw ( ULong ); */
5756 IRTemp t64 = newTemp(Ity_I64);
5757 IRTemp ew = newTemp(Ity_I32);
5758 DIP("fldcw %s\n", dis_buf);
5759 assign( t64, mkIRExprCCall(
5760 Ity_I64, 0/*regparms*/,
5761 "amd64g_check_fldcw",
5762 &amd64g_check_fldcw,
5763 mkIRExprVec_1(
5764 unop( Iop_16Uto64,
5765 loadLE(Ity_I16, mkexpr(addr)))
5770 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5771 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5772 put_emwarn( mkexpr(ew) );
5773 /* Finally, if an emulation warning was reported,
5774 side-exit to the next insn, reporting the warning,
5775 so that Valgrind's dispatcher sees the warning. */
5776 stmt(
5777 IRStmt_Exit(
5778 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5779 Ijk_EmWarn,
5780 IRConst_U64( guest_RIP_bbstart+delta ),
5781 OFFB_RIP
5784 break;
5787 case 6: { /* FNSTENV m28 */
5788 /* Uses dirty helper:
5789 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5790 IRDirty* d = unsafeIRDirty_0_N (
5791 0/*regparms*/,
5792 "amd64g_dirtyhelper_FSTENV",
5793 &amd64g_dirtyhelper_FSTENV,
5794 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5796 /* declare we're writing memory */
5797 d->mFx = Ifx_Write;
5798 d->mAddr = mkexpr(addr);
5799 d->mSize = 28;
5801 /* declare we're reading guest state */
5802 d->nFxState = 4;
5803 vex_bzero(&d->fxState, sizeof(d->fxState));
5805 d->fxState[0].fx = Ifx_Read;
5806 d->fxState[0].offset = OFFB_FTOP;
5807 d->fxState[0].size = sizeof(UInt);
5809 d->fxState[1].fx = Ifx_Read;
5810 d->fxState[1].offset = OFFB_FPTAGS;
5811 d->fxState[1].size = 8 * sizeof(UChar);
5813 d->fxState[2].fx = Ifx_Read;
5814 d->fxState[2].offset = OFFB_FPROUND;
5815 d->fxState[2].size = sizeof(ULong);
5817 d->fxState[3].fx = Ifx_Read;
5818 d->fxState[3].offset = OFFB_FC3210;
5819 d->fxState[3].size = sizeof(ULong);
5821 stmt( IRStmt_Dirty(d) );
5823 DIP("fnstenv %s\n", dis_buf);
5824 break;
5827 case 7: /* FNSTCW */
5828 /* Fake up a native x87 FPU control word. The only
5829 thing it depends on is FPROUND[1:0], so call a clean
5830 helper to cook it up. */
5831 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5832 DIP("fnstcw %s\n", dis_buf);
5833 storeLE(
5834 mkexpr(addr),
5835 unop( Iop_64to16,
5836 mkIRExprCCall(
5837 Ity_I64, 0/*regp*/,
5838 "amd64g_create_fpucw", &amd64g_create_fpucw,
5839 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5843 break;
5845 default:
5846 vex_printf("unhandled opc_aux = 0x%2x\n",
5847 (UInt)gregLO3ofRM(modrm));
5848 vex_printf("first_opcode == 0xD9\n");
5849 goto decode_fail;
5852 } else {
5853 delta++;
5854 switch (modrm) {
5856 case 0xC0 ... 0xC7: /* FLD %st(?) */
5857 r_src = (UInt)modrm - 0xC0;
5858 DIP("fld %%st(%u)\n", r_src);
5859 t1 = newTemp(Ity_F64);
5860 assign(t1, get_ST(r_src));
5861 fp_push();
5862 put_ST(0, mkexpr(t1));
5863 break;
5865 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5866 r_src = (UInt)modrm - 0xC8;
5867 DIP("fxch %%st(%u)\n", r_src);
5868 t1 = newTemp(Ity_F64);
5869 t2 = newTemp(Ity_F64);
5870 assign(t1, get_ST(0));
5871 assign(t2, get_ST(r_src));
5872 put_ST_UNCHECKED(0, mkexpr(t2));
5873 put_ST_UNCHECKED(r_src, mkexpr(t1));
5874 break;
5876 case 0xE0: /* FCHS */
5877 DIP("fchs\n");
5878 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5879 break;
5881 case 0xE1: /* FABS */
5882 DIP("fabs\n");
5883 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5884 break;
5886 case 0xE5: { /* FXAM */
5887 /* This is an interesting one. It examines %st(0),
5888 regardless of whether the tag says it's empty or not.
5889 Here, just pass both the tag (in our format) and the
5890 value (as a double, actually a ULong) to a helper
5891 function. */
5892 IRExpr** args
5893 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5894 unop(Iop_ReinterpF64asI64,
5895 get_ST_UNCHECKED(0)) );
5896 put_C3210(mkIRExprCCall(
5897 Ity_I64,
5898 0/*regparm*/,
5899 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5900 args
5902 DIP("fxam\n");
5903 break;
5906 case 0xE8: /* FLD1 */
5907 DIP("fld1\n");
5908 fp_push();
5909 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5910 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5911 break;
5913 case 0xE9: /* FLDL2T */
5914 DIP("fldl2t\n");
5915 fp_push();
5916 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5917 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5918 break;
5920 case 0xEA: /* FLDL2E */
5921 DIP("fldl2e\n");
5922 fp_push();
5923 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5924 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5925 break;
5927 case 0xEB: /* FLDPI */
5928 DIP("fldpi\n");
5929 fp_push();
5930 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5931 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5932 break;
5934 case 0xEC: /* FLDLG2 */
5935 DIP("fldlg2\n");
5936 fp_push();
5937 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5938 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5939 break;
5941 case 0xED: /* FLDLN2 */
5942 DIP("fldln2\n");
5943 fp_push();
5944 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5945 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5946 break;
5948 case 0xEE: /* FLDZ */
5949 DIP("fldz\n");
5950 fp_push();
5951 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5952 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5953 break;
5955 case 0xF0: /* F2XM1 */
5956 DIP("f2xm1\n");
5957 put_ST_UNCHECKED(0,
5958 binop(Iop_2xm1F64,
5959 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5960 get_ST(0)));
5961 break;
5963 case 0xF1: /* FYL2X */
5964 DIP("fyl2x\n");
5965 put_ST_UNCHECKED(1,
5966 triop(Iop_Yl2xF64,
5967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5968 get_ST(1),
5969 get_ST(0)));
5970 fp_pop();
5971 break;
5973 case 0xF2: { /* FPTAN */
5974 DIP("fptan\n");
5975 IRTemp argD = newTemp(Ity_F64);
5976 assign(argD, get_ST(0));
5977 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5978 IRTemp resD = newTemp(Ity_F64);
5979 assign(resD,
5980 IRExpr_ITE(
5981 mkexpr(argOK),
5982 binop(Iop_TanF64,
5983 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5984 mkexpr(argD)),
5985 mkexpr(argD))
5987 put_ST_UNCHECKED(0, mkexpr(resD));
5988 /* Conditionally push 1.0 on the stack, if the arg is
5989 in range */
5990 maybe_fp_push(argOK);
5991 maybe_put_ST(argOK, 0,
5992 IRExpr_Const(IRConst_F64(1.0)));
5993 set_C2( binop(Iop_Xor64,
5994 unop(Iop_1Uto64, mkexpr(argOK)),
5995 mkU64(1)) );
5996 break;
5999 case 0xF3: /* FPATAN */
6000 DIP("fpatan\n");
6001 put_ST_UNCHECKED(1,
6002 triop(Iop_AtanF64,
6003 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6004 get_ST(1),
6005 get_ST(0)));
6006 fp_pop();
6007 break;
6009 case 0xF4: { /* FXTRACT */
6010 IRTemp argF = newTemp(Ity_F64);
6011 IRTemp sigF = newTemp(Ity_F64);
6012 IRTemp expF = newTemp(Ity_F64);
6013 IRTemp argI = newTemp(Ity_I64);
6014 IRTemp sigI = newTemp(Ity_I64);
6015 IRTemp expI = newTemp(Ity_I64);
6016 DIP("fxtract\n");
6017 assign( argF, get_ST(0) );
6018 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
6019 assign( sigI,
6020 mkIRExprCCall(
6021 Ity_I64, 0/*regparms*/,
6022 "x86amd64g_calculate_FXTRACT",
6023 &x86amd64g_calculate_FXTRACT,
6024 mkIRExprVec_2( mkexpr(argI),
6025 mkIRExpr_HWord(0)/*sig*/ ))
6027 assign( expI,
6028 mkIRExprCCall(
6029 Ity_I64, 0/*regparms*/,
6030 "x86amd64g_calculate_FXTRACT",
6031 &x86amd64g_calculate_FXTRACT,
6032 mkIRExprVec_2( mkexpr(argI),
6033 mkIRExpr_HWord(1)/*exp*/ ))
6035 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
6036 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
6037 /* exponent */
6038 put_ST_UNCHECKED(0, mkexpr(expF) );
6039 fp_push();
6040 /* significand */
6041 put_ST(0, mkexpr(sigF) );
6042 break;
6045 case 0xF5: { /* FPREM1 -- IEEE compliant */
6046 IRTemp a1 = newTemp(Ity_F64);
6047 IRTemp a2 = newTemp(Ity_F64);
6048 DIP("fprem1\n");
6049 /* Do FPREM1 twice, once to get the remainder, and once
6050 to get the C3210 flag values. */
6051 assign( a1, get_ST(0) );
6052 assign( a2, get_ST(1) );
6053 put_ST_UNCHECKED(0,
6054 triop(Iop_PRem1F64,
6055 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6056 mkexpr(a1),
6057 mkexpr(a2)));
6058 put_C3210(
6059 unop(Iop_32Uto64,
6060 triop(Iop_PRem1C3210F64,
6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6062 mkexpr(a1),
6063 mkexpr(a2)) ));
6064 break;
6067 case 0xF7: /* FINCSTP */
6068 DIP("fincstp\n");
6069 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
6070 break;
6072 case 0xF8: { /* FPREM -- not IEEE compliant */
6073 IRTemp a1 = newTemp(Ity_F64);
6074 IRTemp a2 = newTemp(Ity_F64);
6075 DIP("fprem\n");
6076 /* Do FPREM twice, once to get the remainder, and once
6077 to get the C3210 flag values. */
6078 assign( a1, get_ST(0) );
6079 assign( a2, get_ST(1) );
6080 put_ST_UNCHECKED(0,
6081 triop(Iop_PRemF64,
6082 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6083 mkexpr(a1),
6084 mkexpr(a2)));
6085 put_C3210(
6086 unop(Iop_32Uto64,
6087 triop(Iop_PRemC3210F64,
6088 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6089 mkexpr(a1),
6090 mkexpr(a2)) ));
6091 break;
6094 case 0xF9: /* FYL2XP1 */
6095 DIP("fyl2xp1\n");
6096 put_ST_UNCHECKED(1,
6097 triop(Iop_Yl2xp1F64,
6098 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6099 get_ST(1),
6100 get_ST(0)));
6101 fp_pop();
6102 break;
6104 case 0xFA: /* FSQRT */
6105 DIP("fsqrt\n");
6106 put_ST_UNCHECKED(0,
6107 binop(Iop_SqrtF64,
6108 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6109 get_ST(0)));
6110 break;
6112 case 0xFB: { /* FSINCOS */
6113 DIP("fsincos\n");
6114 IRTemp argD = newTemp(Ity_F64);
6115 assign(argD, get_ST(0));
6116 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6117 IRTemp resD = newTemp(Ity_F64);
6118 assign(resD,
6119 IRExpr_ITE(
6120 mkexpr(argOK),
6121 binop(Iop_SinF64,
6122 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6123 mkexpr(argD)),
6124 mkexpr(argD))
6126 put_ST_UNCHECKED(0, mkexpr(resD));
6127 /* Conditionally push the cos value on the stack, if
6128 the arg is in range */
6129 maybe_fp_push(argOK);
6130 maybe_put_ST(argOK, 0,
6131 binop(Iop_CosF64,
6132 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6133 mkexpr(argD)));
6134 set_C2( binop(Iop_Xor64,
6135 unop(Iop_1Uto64, mkexpr(argOK)),
6136 mkU64(1)) );
6137 break;
6140 case 0xFC: /* FRNDINT */
6141 DIP("frndint\n");
6142 put_ST_UNCHECKED(0,
6143 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
6144 break;
6146 case 0xFD: /* FSCALE */
6147 DIP("fscale\n");
6148 put_ST_UNCHECKED(0,
6149 triop(Iop_ScaleF64,
6150 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6151 get_ST(0),
6152 get_ST(1)));
6153 break;
6155 case 0xFE: /* FSIN */
6156 case 0xFF: { /* FCOS */
6157 Bool isSIN = modrm == 0xFE;
6158 DIP("%s\n", isSIN ? "fsin" : "fcos");
6159 IRTemp argD = newTemp(Ity_F64);
6160 assign(argD, get_ST(0));
6161 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6162 IRTemp resD = newTemp(Ity_F64);
6163 assign(resD,
6164 IRExpr_ITE(
6165 mkexpr(argOK),
6166 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6167 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6168 mkexpr(argD)),
6169 mkexpr(argD))
6171 put_ST_UNCHECKED(0, mkexpr(resD));
6172 set_C2( binop(Iop_Xor64,
6173 unop(Iop_1Uto64, mkexpr(argOK)),
6174 mkU64(1)) );
6175 break;
6178 default:
6179 goto decode_fail;
6184 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6185 else
6186 if (first_opcode == 0xDA) {
6188 if (modrm < 0xC0) {
6190 /* bits 5,4,3 are an opcode extension, and the modRM also
6191 specifies an address. */
6192 IROp fop;
6193 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6194 delta += len;
6195 switch (gregLO3ofRM(modrm)) {
6197 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6198 DIP("fiaddl %s\n", dis_buf);
6199 fop = Iop_AddF64;
6200 goto do_fop_m32;
6202 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6203 DIP("fimull %s\n", dis_buf);
6204 fop = Iop_MulF64;
6205 goto do_fop_m32;
6207 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6208 DIP("fisubl %s\n", dis_buf);
6209 fop = Iop_SubF64;
6210 goto do_fop_m32;
6212 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6213 DIP("fisubrl %s\n", dis_buf);
6214 fop = Iop_SubF64;
6215 goto do_foprev_m32;
6217 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6218 DIP("fisubl %s\n", dis_buf);
6219 fop = Iop_DivF64;
6220 goto do_fop_m32;
6222 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6223 DIP("fidivrl %s\n", dis_buf);
6224 fop = Iop_DivF64;
6225 goto do_foprev_m32;
6227 do_fop_m32:
6228 put_ST_UNCHECKED(0,
6229 triop(fop,
6230 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6231 get_ST(0),
6232 unop(Iop_I32StoF64,
6233 loadLE(Ity_I32, mkexpr(addr)))));
6234 break;
6236 do_foprev_m32:
6237 put_ST_UNCHECKED(0,
6238 triop(fop,
6239 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6240 unop(Iop_I32StoF64,
6241 loadLE(Ity_I32, mkexpr(addr))),
6242 get_ST(0)));
6243 break;
6245 default:
6246 vex_printf("unhandled opc_aux = 0x%2x\n",
6247 (UInt)gregLO3ofRM(modrm));
6248 vex_printf("first_opcode == 0xDA\n");
6249 goto decode_fail;
6252 } else {
6254 delta++;
6255 switch (modrm) {
6257 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6258 r_src = (UInt)modrm - 0xC0;
6259 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
6260 put_ST_UNCHECKED(0,
6261 IRExpr_ITE(
6262 mk_amd64g_calculate_condition(AMD64CondB),
6263 get_ST(r_src), get_ST(0)) );
6264 break;
6266 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6267 r_src = (UInt)modrm - 0xC8;
6268 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
6269 put_ST_UNCHECKED(0,
6270 IRExpr_ITE(
6271 mk_amd64g_calculate_condition(AMD64CondZ),
6272 get_ST(r_src), get_ST(0)) );
6273 break;
6275 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6276 r_src = (UInt)modrm - 0xD0;
6277 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
6278 put_ST_UNCHECKED(0,
6279 IRExpr_ITE(
6280 mk_amd64g_calculate_condition(AMD64CondBE),
6281 get_ST(r_src), get_ST(0)) );
6282 break;
6284 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6285 r_src = (UInt)modrm - 0xD8;
6286 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6287 put_ST_UNCHECKED(0,
6288 IRExpr_ITE(
6289 mk_amd64g_calculate_condition(AMD64CondP),
6290 get_ST(r_src), get_ST(0)) );
6291 break;
6293 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6294 DIP("fucompp %%st(0),%%st(1)\n");
6295 /* This forces C1 to zero, which isn't right. */
6296 put_C3210(
6297 unop(Iop_32Uto64,
6298 binop( Iop_And32,
6299 binop(Iop_Shl32,
6300 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6301 mkU8(8)),
6302 mkU32(0x4500)
6303 )));
6304 fp_pop();
6305 fp_pop();
6306 break;
6308 default:
6309 goto decode_fail;
6315 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6316 else
6317 if (first_opcode == 0xDB) {
6318 if (modrm < 0xC0) {
6320 /* bits 5,4,3 are an opcode extension, and the modRM also
6321 specifies an address. */
6322 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6323 delta += len;
6325 switch (gregLO3ofRM(modrm)) {
6327 case 0: /* FILD m32int */
6328 DIP("fildl %s\n", dis_buf);
6329 fp_push();
6330 put_ST(0, unop(Iop_I32StoF64,
6331 loadLE(Ity_I32, mkexpr(addr))));
6332 break;
6334 case 1: /* FISTTPL m32 (SSE3) */
6335 DIP("fisttpl %s\n", dis_buf);
6336 storeLE( mkexpr(addr),
6337 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
6338 fp_pop();
6339 break;
6341 case 2: /* FIST m32 */
6342 DIP("fistl %s\n", dis_buf);
6343 storeLE( mkexpr(addr),
6344 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6345 break;
6347 case 3: /* FISTP m32 */
6348 DIP("fistpl %s\n", dis_buf);
6349 storeLE( mkexpr(addr),
6350 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6351 fp_pop();
6352 break;
6354 case 5: { /* FLD extended-real */
6355 /* Uses dirty helper:
6356 ULong amd64g_loadF80le ( ULong )
6357 addr holds the address. First, do a dirty call to
6358 get hold of the data. */
6359 IRTemp val = newTemp(Ity_I64);
6360 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6362 IRDirty* d = unsafeIRDirty_1_N (
6363 val,
6364 0/*regparms*/,
6365 "amd64g_dirtyhelper_loadF80le",
6366 &amd64g_dirtyhelper_loadF80le,
6367 args
6369 /* declare that we're reading memory */
6370 d->mFx = Ifx_Read;
6371 d->mAddr = mkexpr(addr);
6372 d->mSize = 10;
6374 /* execute the dirty call, dumping the result in val. */
6375 stmt( IRStmt_Dirty(d) );
6376 fp_push();
6377 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6379 DIP("fldt %s\n", dis_buf);
6380 break;
6383 case 7: { /* FSTP extended-real */
6384 /* Uses dirty helper:
6385 void amd64g_storeF80le ( ULong addr, ULong data )
6387 IRExpr** args
6388 = mkIRExprVec_2( mkexpr(addr),
6389 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6391 IRDirty* d = unsafeIRDirty_0_N (
6392 0/*regparms*/,
6393 "amd64g_dirtyhelper_storeF80le",
6394 &amd64g_dirtyhelper_storeF80le,
6395 args
6397 /* declare we're writing memory */
6398 d->mFx = Ifx_Write;
6399 d->mAddr = mkexpr(addr);
6400 d->mSize = 10;
6402 /* execute the dirty call. */
6403 stmt( IRStmt_Dirty(d) );
6404 fp_pop();
6406 DIP("fstpt\n %s", dis_buf);
6407 break;
6410 default:
6411 vex_printf("unhandled opc_aux = 0x%2x\n",
6412 (UInt)gregLO3ofRM(modrm));
6413 vex_printf("first_opcode == 0xDB\n");
6414 goto decode_fail;
6417 } else {
6419 delta++;
6420 switch (modrm) {
6422 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6423 r_src = (UInt)modrm - 0xC0;
6424 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
6425 put_ST_UNCHECKED(0,
6426 IRExpr_ITE(
6427 mk_amd64g_calculate_condition(AMD64CondNB),
6428 get_ST(r_src), get_ST(0)) );
6429 break;
6431 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6432 r_src = (UInt)modrm - 0xC8;
6433 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
6434 put_ST_UNCHECKED(
6436 IRExpr_ITE(
6437 mk_amd64g_calculate_condition(AMD64CondNZ),
6438 get_ST(r_src),
6439 get_ST(0)
6442 break;
6444 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6445 r_src = (UInt)modrm - 0xD0;
6446 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
6447 put_ST_UNCHECKED(
6449 IRExpr_ITE(
6450 mk_amd64g_calculate_condition(AMD64CondNBE),
6451 get_ST(r_src),
6452 get_ST(0)
6455 break;
6457 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6458 r_src = (UInt)modrm - 0xD8;
6459 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6460 put_ST_UNCHECKED(
6462 IRExpr_ITE(
6463 mk_amd64g_calculate_condition(AMD64CondNP),
6464 get_ST(r_src),
6465 get_ST(0)
6468 break;
6470 case 0xE2:
6471 DIP("fnclex\n");
6472 break;
6474 case 0xE3: {
6475 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/);
6476 DIP("fninit\n");
6477 break;
6480 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6481 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6482 break;
6484 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6485 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6486 break;
6488 default:
6489 goto decode_fail;
6494 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6495 else
6496 if (first_opcode == 0xDC) {
6497 if (modrm < 0xC0) {
6499 /* bits 5,4,3 are an opcode extension, and the modRM also
6500 specifies an address. */
6501 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6502 delta += len;
6504 switch (gregLO3ofRM(modrm)) {
6506 case 0: /* FADD double-real */
6507 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6508 break;
6510 case 1: /* FMUL double-real */
6511 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6512 break;
6514 case 2: /* FCOM double-real */
6515 DIP("fcoml %s\n", dis_buf);
6516 /* This forces C1 to zero, which isn't right. */
6517 put_C3210(
6518 unop(Iop_32Uto64,
6519 binop( Iop_And32,
6520 binop(Iop_Shl32,
6521 binop(Iop_CmpF64,
6522 get_ST(0),
6523 loadLE(Ity_F64,mkexpr(addr))),
6524 mkU8(8)),
6525 mkU32(0x4500)
6526 )));
6527 break;
6529 case 3: /* FCOMP double-real */
6530 DIP("fcompl %s\n", dis_buf);
6531 /* This forces C1 to zero, which isn't right. */
6532 put_C3210(
6533 unop(Iop_32Uto64,
6534 binop( Iop_And32,
6535 binop(Iop_Shl32,
6536 binop(Iop_CmpF64,
6537 get_ST(0),
6538 loadLE(Ity_F64,mkexpr(addr))),
6539 mkU8(8)),
6540 mkU32(0x4500)
6541 )));
6542 fp_pop();
6543 break;
6545 case 4: /* FSUB double-real */
6546 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6547 break;
6549 case 5: /* FSUBR double-real */
6550 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6551 break;
6553 case 6: /* FDIV double-real */
6554 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6555 break;
6557 case 7: /* FDIVR double-real */
6558 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6559 break;
6561 default:
6562 vex_printf("unhandled opc_aux = 0x%2x\n",
6563 (UInt)gregLO3ofRM(modrm));
6564 vex_printf("first_opcode == 0xDC\n");
6565 goto decode_fail;
6568 } else {
6570 delta++;
6571 switch (modrm) {
6573 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6574 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6575 break;
6577 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6578 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6579 break;
6581 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6582 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6583 break;
6585 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6586 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6587 break;
6589 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6590 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6591 break;
6593 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6594 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6595 break;
6597 default:
6598 goto decode_fail;
6604 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6605 else
6606 if (first_opcode == 0xDD) {
6608 if (modrm < 0xC0) {
6610 /* bits 5,4,3 are an opcode extension, and the modRM also
6611 specifies an address. */
6612 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6613 delta += len;
6615 switch (gregLO3ofRM(modrm)) {
6617 case 0: /* FLD double-real */
6618 DIP("fldl %s\n", dis_buf);
6619 fp_push();
6620 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
6621 break;
6623 case 1: /* FISTTPQ m64 (SSE3) */
6624 DIP("fistppll %s\n", dis_buf);
6625 storeLE( mkexpr(addr),
6626 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
6627 fp_pop();
6628 break;
6630 case 2: /* FST double-real */
6631 DIP("fstl %s\n", dis_buf);
6632 storeLE(mkexpr(addr), get_ST(0));
6633 break;
6635 case 3: /* FSTP double-real */
6636 DIP("fstpl %s\n", dis_buf);
6637 storeLE(mkexpr(addr), get_ST(0));
6638 fp_pop();
6639 break;
6641 case 4: { /* FRSTOR m94/m108 */
6642 IRTemp ew = newTemp(Ity_I32);
6643 IRTemp w64 = newTemp(Ity_I64);
6644 IRDirty* d;
6645 if ( have66(pfx) ) {
6646 /* Uses dirty helper:
6647 VexEmNote amd64g_dirtyhelper_FRSTORS
6648 ( VexGuestAMD64State*, HWord ) */
6649 d = unsafeIRDirty_0_N (
6650 0/*regparms*/,
6651 "amd64g_dirtyhelper_FRSTORS",
6652 &amd64g_dirtyhelper_FRSTORS,
6653 mkIRExprVec_1( mkexpr(addr) )
6655 d->mSize = 94;
6656 } else {
6657 /* Uses dirty helper:
6658 VexEmNote amd64g_dirtyhelper_FRSTOR
6659 ( VexGuestAMD64State*, HWord ) */
6660 d = unsafeIRDirty_0_N (
6661 0/*regparms*/,
6662 "amd64g_dirtyhelper_FRSTOR",
6663 &amd64g_dirtyhelper_FRSTOR,
6664 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6666 d->mSize = 108;
6669 d->tmp = w64;
6670 /* declare we're reading memory */
6671 d->mFx = Ifx_Read;
6672 d->mAddr = mkexpr(addr);
6673 /* d->mSize set above */
6675 /* declare we're writing guest state */
6676 d->nFxState = 5;
6677 vex_bzero(&d->fxState, sizeof(d->fxState));
6679 d->fxState[0].fx = Ifx_Write;
6680 d->fxState[0].offset = OFFB_FTOP;
6681 d->fxState[0].size = sizeof(UInt);
6683 d->fxState[1].fx = Ifx_Write;
6684 d->fxState[1].offset = OFFB_FPREGS;
6685 d->fxState[1].size = 8 * sizeof(ULong);
6687 d->fxState[2].fx = Ifx_Write;
6688 d->fxState[2].offset = OFFB_FPTAGS;
6689 d->fxState[2].size = 8 * sizeof(UChar);
6691 d->fxState[3].fx = Ifx_Write;
6692 d->fxState[3].offset = OFFB_FPROUND;
6693 d->fxState[3].size = sizeof(ULong);
6695 d->fxState[4].fx = Ifx_Write;
6696 d->fxState[4].offset = OFFB_FC3210;
6697 d->fxState[4].size = sizeof(ULong);
6699 stmt( IRStmt_Dirty(d) );
6701 /* ew contains any emulation warning we may need to
6702 issue. If needed, side-exit to the next insn,
6703 reporting the warning, so that Valgrind's dispatcher
6704 sees the warning. */
6705 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6706 put_emwarn( mkexpr(ew) );
6707 stmt(
6708 IRStmt_Exit(
6709 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6710 Ijk_EmWarn,
6711 IRConst_U64( guest_RIP_bbstart+delta ),
6712 OFFB_RIP
6716 if ( have66(pfx) ) {
6717 DIP("frstors %s\n", dis_buf);
6718 } else {
6719 DIP("frstor %s\n", dis_buf);
6721 break;
6724 case 6: { /* FNSAVE m94/m108 */
6725 IRDirty *d;
6726 if ( have66(pfx) ) {
6727 /* Uses dirty helper:
6728 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6729 HWord ) */
6730 d = unsafeIRDirty_0_N (
6731 0/*regparms*/,
6732 "amd64g_dirtyhelper_FNSAVES",
6733 &amd64g_dirtyhelper_FNSAVES,
6734 mkIRExprVec_1( mkexpr(addr) )
6736 d->mSize = 94;
6737 } else {
6738 /* Uses dirty helper:
6739 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6740 HWord ) */
6741 d = unsafeIRDirty_0_N (
6742 0/*regparms*/,
6743 "amd64g_dirtyhelper_FNSAVE",
6744 &amd64g_dirtyhelper_FNSAVE,
6745 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6747 d->mSize = 108;
6750 /* declare we're writing memory */
6751 d->mFx = Ifx_Write;
6752 d->mAddr = mkexpr(addr);
6753 /* d->mSize set above */
6755 /* declare we're reading guest state */
6756 d->nFxState = 5;
6757 vex_bzero(&d->fxState, sizeof(d->fxState));
6759 d->fxState[0].fx = Ifx_Read;
6760 d->fxState[0].offset = OFFB_FTOP;
6761 d->fxState[0].size = sizeof(UInt);
6763 d->fxState[1].fx = Ifx_Read;
6764 d->fxState[1].offset = OFFB_FPREGS;
6765 d->fxState[1].size = 8 * sizeof(ULong);
6767 d->fxState[2].fx = Ifx_Read;
6768 d->fxState[2].offset = OFFB_FPTAGS;
6769 d->fxState[2].size = 8 * sizeof(UChar);
6771 d->fxState[3].fx = Ifx_Read;
6772 d->fxState[3].offset = OFFB_FPROUND;
6773 d->fxState[3].size = sizeof(ULong);
6775 d->fxState[4].fx = Ifx_Read;
6776 d->fxState[4].offset = OFFB_FC3210;
6777 d->fxState[4].size = sizeof(ULong);
6779 stmt( IRStmt_Dirty(d) );
6781 if ( have66(pfx) ) {
6782 DIP("fnsaves %s\n", dis_buf);
6783 } else {
6784 DIP("fnsave %s\n", dis_buf);
6786 break;
6789 case 7: { /* FNSTSW m16 */
6790 IRExpr* sw = get_FPU_sw();
6791 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
6792 storeLE( mkexpr(addr), sw );
6793 DIP("fnstsw %s\n", dis_buf);
6794 break;
6797 default:
6798 vex_printf("unhandled opc_aux = 0x%2x\n",
6799 (UInt)gregLO3ofRM(modrm));
6800 vex_printf("first_opcode == 0xDD\n");
6801 goto decode_fail;
6803 } else {
6804 delta++;
6805 switch (modrm) {
6807 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6808 r_dst = (UInt)modrm - 0xC0;
6809 DIP("ffree %%st(%u)\n", r_dst);
6810 put_ST_TAG ( r_dst, mkU8(0) );
6811 break;
6813 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6814 r_dst = (UInt)modrm - 0xD0;
6815 DIP("fst %%st(0),%%st(%u)\n", r_dst);
6816 /* P4 manual says: "If the destination operand is a
6817 non-empty register, the invalid-operation exception
6818 is not generated. Hence put_ST_UNCHECKED. */
6819 put_ST_UNCHECKED(r_dst, get_ST(0));
6820 break;
6822 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6823 r_dst = (UInt)modrm - 0xD8;
6824 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
6825 /* P4 manual says: "If the destination operand is a
6826 non-empty register, the invalid-operation exception
6827 is not generated. Hence put_ST_UNCHECKED. */
6828 put_ST_UNCHECKED(r_dst, get_ST(0));
6829 fp_pop();
6830 break;
6832 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6833 r_dst = (UInt)modrm - 0xE0;
6834 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
6835 /* This forces C1 to zero, which isn't right. */
6836 put_C3210(
6837 unop(Iop_32Uto64,
6838 binop( Iop_And32,
6839 binop(Iop_Shl32,
6840 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6841 mkU8(8)),
6842 mkU32(0x4500)
6843 )));
6844 break;
6846 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6847 r_dst = (UInt)modrm - 0xE8;
6848 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
6849 /* This forces C1 to zero, which isn't right. */
6850 put_C3210(
6851 unop(Iop_32Uto64,
6852 binop( Iop_And32,
6853 binop(Iop_Shl32,
6854 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6855 mkU8(8)),
6856 mkU32(0x4500)
6857 )));
6858 fp_pop();
6859 break;
6861 default:
6862 goto decode_fail;
6867 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6868 else
6869 if (first_opcode == 0xDE) {
6871 if (modrm < 0xC0) {
6873 /* bits 5,4,3 are an opcode extension, and the modRM also
6874 specifies an address. */
6875 IROp fop;
6876 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6877 delta += len;
6879 switch (gregLO3ofRM(modrm)) {
6881 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6882 DIP("fiaddw %s\n", dis_buf);
6883 fop = Iop_AddF64;
6884 goto do_fop_m16;
6886 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6887 DIP("fimulw %s\n", dis_buf);
6888 fop = Iop_MulF64;
6889 goto do_fop_m16;
6891 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6892 DIP("fisubw %s\n", dis_buf);
6893 fop = Iop_SubF64;
6894 goto do_fop_m16;
6896 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6897 DIP("fisubrw %s\n", dis_buf);
6898 fop = Iop_SubF64;
6899 goto do_foprev_m16;
6901 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6902 DIP("fisubw %s\n", dis_buf);
6903 fop = Iop_DivF64;
6904 goto do_fop_m16;
6906 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6907 DIP("fidivrw %s\n", dis_buf);
6908 fop = Iop_DivF64;
6909 goto do_foprev_m16;
6911 do_fop_m16:
6912 put_ST_UNCHECKED(0,
6913 triop(fop,
6914 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6915 get_ST(0),
6916 unop(Iop_I32StoF64,
6917 unop(Iop_16Sto32,
6918 loadLE(Ity_I16, mkexpr(addr))))));
6919 break;
6921 do_foprev_m16:
6922 put_ST_UNCHECKED(0,
6923 triop(fop,
6924 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6925 unop(Iop_I32StoF64,
6926 unop(Iop_16Sto32,
6927 loadLE(Ity_I16, mkexpr(addr)))),
6928 get_ST(0)));
6929 break;
6931 default:
6932 vex_printf("unhandled opc_aux = 0x%2x\n",
6933 (UInt)gregLO3ofRM(modrm));
6934 vex_printf("first_opcode == 0xDE\n");
6935 goto decode_fail;
6938 } else {
6940 delta++;
6941 switch (modrm) {
6943 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6944 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6945 break;
6947 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6948 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6949 break;
6951 case 0xD9: /* FCOMPP %st(0),%st(1) */
6952 DIP("fcompp %%st(0),%%st(1)\n");
6953 /* This forces C1 to zero, which isn't right. */
6954 put_C3210(
6955 unop(Iop_32Uto64,
6956 binop( Iop_And32,
6957 binop(Iop_Shl32,
6958 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6959 mkU8(8)),
6960 mkU32(0x4500)
6961 )));
6962 fp_pop();
6963 fp_pop();
6964 break;
6966 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6967 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6968 break;
6970 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6971 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6972 break;
6974 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6975 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6976 break;
6978 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6979 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6980 break;
6982 default:
6983 goto decode_fail;
6989 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6990 else
6991 if (first_opcode == 0xDF) {
6993 if (modrm < 0xC0) {
6995 /* bits 5,4,3 are an opcode extension, and the modRM also
6996 specifies an address. */
6997 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6998 delta += len;
7000 switch (gregLO3ofRM(modrm)) {
7002 case 0: /* FILD m16int */
7003 DIP("fildw %s\n", dis_buf);
7004 fp_push();
7005 put_ST(0, unop(Iop_I32StoF64,
7006 unop(Iop_16Sto32,
7007 loadLE(Ity_I16, mkexpr(addr)))));
7008 break;
7010 case 1: /* FISTTPS m16 (SSE3) */
7011 DIP("fisttps %s\n", dis_buf);
7012 storeLE( mkexpr(addr),
7013 x87ishly_qnarrow_32_to_16(
7014 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
7015 fp_pop();
7016 break;
7018 case 2: /* FIST m16 */
7019 DIP("fists %s\n", dis_buf);
7020 storeLE( mkexpr(addr),
7021 x87ishly_qnarrow_32_to_16(
7022 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7023 break;
7025 case 3: /* FISTP m16 */
7026 DIP("fistps %s\n", dis_buf);
7027 storeLE( mkexpr(addr),
7028 x87ishly_qnarrow_32_to_16(
7029 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7030 fp_pop();
7031 break;
7033 case 5: /* FILD m64 */
7034 DIP("fildll %s\n", dis_buf);
7035 fp_push();
7036 put_ST(0, binop(Iop_I64StoF64,
7037 get_roundingmode(),
7038 loadLE(Ity_I64, mkexpr(addr))));
7039 break;
7041 case 7: /* FISTP m64 */
7042 DIP("fistpll %s\n", dis_buf);
7043 storeLE( mkexpr(addr),
7044 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
7045 fp_pop();
7046 break;
7048 default:
7049 vex_printf("unhandled opc_aux = 0x%2x\n",
7050 (UInt)gregLO3ofRM(modrm));
7051 vex_printf("first_opcode == 0xDF\n");
7052 goto decode_fail;
7055 } else {
7057 delta++;
7058 switch (modrm) {
7060 case 0xC0: /* FFREEP %st(0) */
7061 DIP("ffreep %%st(%d)\n", 0);
7062 put_ST_TAG ( 0, mkU8(0) );
7063 fp_pop();
7064 break;
7066 case 0xE0: /* FNSTSW %ax */
7067 DIP("fnstsw %%ax\n");
7068 /* Invent a plausible-looking FPU status word value and
7069 dump it in %AX:
7070 ((ftop & 7) << 11) | (c3210 & 0x4700)
7072 putIRegRAX(
7074 unop(Iop_32to16,
7075 binop(Iop_Or32,
7076 binop(Iop_Shl32,
7077 binop(Iop_And32, get_ftop(), mkU32(7)),
7078 mkU8(11)),
7079 binop(Iop_And32,
7080 unop(Iop_64to32, get_C3210()),
7081 mkU32(0x4700))
7082 )));
7083 break;
7085 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7086 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
7087 break;
7089 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7090 /* not really right since COMIP != UCOMIP */
7091 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
7092 break;
7094 default:
7095 goto decode_fail;
7101 else
7102 goto decode_fail;
7104 *decode_ok = True;
7105 return delta;
7107 decode_fail:
7108 *decode_ok = False;
7109 return delta;
7113 /*------------------------------------------------------------*/
7114 /*--- ---*/
7115 /*--- MMX INSTRUCTIONS ---*/
7116 /*--- ---*/
7117 /*------------------------------------------------------------*/
7119 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7120 IA32 arch manual, volume 3):
7122 Read from, or write to MMX register (viz, any insn except EMMS):
7123 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7124 * FP stack pointer set to zero
7126 EMMS:
7127 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7128 * FP stack pointer set to zero
7131 static void do_MMX_preamble ( void )
7133 Int i;
7134 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7135 IRExpr* zero = mkU32(0);
7136 IRExpr* tag1 = mkU8(1);
7137 put_ftop(zero);
7138 for (i = 0; i < 8; i++)
7139 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
7142 static void do_EMMS_preamble ( void )
7144 Int i;
7145 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7146 IRExpr* zero = mkU32(0);
7147 IRExpr* tag0 = mkU8(0);
7148 put_ftop(zero);
7149 for (i = 0; i < 8; i++)
7150 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
7154 static IRExpr* getMMXReg ( UInt archreg )
7156 vassert(archreg < 8);
7157 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7161 static void putMMXReg ( UInt archreg, IRExpr* e )
7163 vassert(archreg < 8);
7164 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
7165 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7169 /* Helper for non-shift MMX insns. Note this is incomplete in the
7170 sense that it does not first call do_MMX_preamble() -- that is the
7171 responsibility of its caller. */
7173 static
7174 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi,
7175 Prefix pfx,
7176 Long delta,
7177 UChar opc,
7178 const HChar* name,
7179 Bool show_granularity )
7181 HChar dis_buf[50];
7182 UChar modrm = getUChar(delta);
7183 Bool isReg = epartIsReg(modrm);
7184 IRExpr* argL = NULL;
7185 IRExpr* argR = NULL;
7186 IRExpr* argG = NULL;
7187 IRExpr* argE = NULL;
7188 IRTemp res = newTemp(Ity_I64);
7190 Bool invG = False;
7191 IROp op = Iop_INVALID;
7192 void* hAddr = NULL;
7193 const HChar* hName = NULL;
7194 Bool eLeft = False;
7196 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7198 switch (opc) {
7199 /* Original MMX ones */
7200 case 0xFC: op = Iop_Add8x8; break;
7201 case 0xFD: op = Iop_Add16x4; break;
7202 case 0xFE: op = Iop_Add32x2; break;
7204 case 0xEC: op = Iop_QAdd8Sx8; break;
7205 case 0xED: op = Iop_QAdd16Sx4; break;
7207 case 0xDC: op = Iop_QAdd8Ux8; break;
7208 case 0xDD: op = Iop_QAdd16Ux4; break;
7210 case 0xF8: op = Iop_Sub8x8; break;
7211 case 0xF9: op = Iop_Sub16x4; break;
7212 case 0xFA: op = Iop_Sub32x2; break;
7214 case 0xE8: op = Iop_QSub8Sx8; break;
7215 case 0xE9: op = Iop_QSub16Sx4; break;
7217 case 0xD8: op = Iop_QSub8Ux8; break;
7218 case 0xD9: op = Iop_QSub16Ux4; break;
7220 case 0xE5: op = Iop_MulHi16Sx4; break;
7221 case 0xD5: op = Iop_Mul16x4; break;
7222 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7224 case 0x74: op = Iop_CmpEQ8x8; break;
7225 case 0x75: op = Iop_CmpEQ16x4; break;
7226 case 0x76: op = Iop_CmpEQ32x2; break;
7228 case 0x64: op = Iop_CmpGT8Sx8; break;
7229 case 0x65: op = Iop_CmpGT16Sx4; break;
7230 case 0x66: op = Iop_CmpGT32Sx2; break;
7232 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7233 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7234 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
7236 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7237 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7238 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7240 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7241 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7242 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7244 case 0xDB: op = Iop_And64; break;
7245 case 0xDF: op = Iop_And64; invG = True; break;
7246 case 0xEB: op = Iop_Or64; break;
7247 case 0xEF: /* Possibly do better here if argL and argR are the
7248 same reg */
7249 op = Iop_Xor64; break;
7251 /* Introduced in SSE1 */
7252 case 0xE0: op = Iop_Avg8Ux8; break;
7253 case 0xE3: op = Iop_Avg16Ux4; break;
7254 case 0xEE: op = Iop_Max16Sx4; break;
7255 case 0xDE: op = Iop_Max8Ux8; break;
7256 case 0xEA: op = Iop_Min16Sx4; break;
7257 case 0xDA: op = Iop_Min8Ux8; break;
7258 case 0xE4: op = Iop_MulHi16Ux4; break;
7259 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
7261 /* Introduced in SSE2 */
7262 case 0xD4: op = Iop_Add64; break;
7263 case 0xFB: op = Iop_Sub64; break;
7265 default:
7266 vex_printf("\n0x%x\n", (UInt)opc);
7267 vpanic("dis_MMXop_regmem_to_reg");
7270 # undef XXX
7272 argG = getMMXReg(gregLO3ofRM(modrm));
7273 if (invG)
7274 argG = unop(Iop_Not64, argG);
7276 if (isReg) {
7277 delta++;
7278 argE = getMMXReg(eregLO3ofRM(modrm));
7279 } else {
7280 Int len;
7281 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7282 delta += len;
7283 argE = loadLE(Ity_I64, mkexpr(addr));
7286 if (eLeft) {
7287 argL = argE;
7288 argR = argG;
7289 } else {
7290 argL = argG;
7291 argR = argE;
7294 if (op != Iop_INVALID) {
7295 vassert(hName == NULL);
7296 vassert(hAddr == NULL);
7297 assign(res, binop(op, argL, argR));
7298 } else {
7299 vassert(hName != NULL);
7300 vassert(hAddr != NULL);
7301 assign( res,
7302 mkIRExprCCall(
7303 Ity_I64,
7304 0/*regparms*/, hName, hAddr,
7305 mkIRExprVec_2( argL, argR )
7310 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7312 DIP("%s%s %s, %s\n",
7313 name, show_granularity ? nameMMXGran(opc & 3) : "",
7314 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7315 nameMMXReg(gregLO3ofRM(modrm)) );
7317 return delta;
7321 /* Vector by scalar shift of G by the amount specified at the bottom
7322 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7324 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi,
7325 Prefix pfx, Long delta,
7326 const HChar* opname, IROp op )
7328 HChar dis_buf[50];
7329 Int alen, size;
7330 IRTemp addr;
7331 Bool shl, shr, sar;
7332 UChar rm = getUChar(delta);
7333 IRTemp g0 = newTemp(Ity_I64);
7334 IRTemp g1 = newTemp(Ity_I64);
7335 IRTemp amt = newTemp(Ity_I64);
7336 IRTemp amt8 = newTemp(Ity_I8);
7338 if (epartIsReg(rm)) {
7339 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7340 DIP("%s %s,%s\n", opname,
7341 nameMMXReg(eregLO3ofRM(rm)),
7342 nameMMXReg(gregLO3ofRM(rm)) );
7343 delta++;
7344 } else {
7345 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7346 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7347 DIP("%s %s,%s\n", opname,
7348 dis_buf,
7349 nameMMXReg(gregLO3ofRM(rm)) );
7350 delta += alen;
7352 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7353 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7355 shl = shr = sar = False;
7356 size = 0;
7357 switch (op) {
7358 case Iop_ShlN16x4: shl = True; size = 32; break;
7359 case Iop_ShlN32x2: shl = True; size = 32; break;
7360 case Iop_Shl64: shl = True; size = 64; break;
7361 case Iop_ShrN16x4: shr = True; size = 16; break;
7362 case Iop_ShrN32x2: shr = True; size = 32; break;
7363 case Iop_Shr64: shr = True; size = 64; break;
7364 case Iop_SarN16x4: sar = True; size = 16; break;
7365 case Iop_SarN32x2: sar = True; size = 32; break;
7366 default: vassert(0);
7369 if (shl || shr) {
7370 assign(
7372 IRExpr_ITE(
7373 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7374 binop(op, mkexpr(g0), mkexpr(amt8)),
7375 mkU64(0)
7378 } else
7379 if (sar) {
7380 assign(
7382 IRExpr_ITE(
7383 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7384 binop(op, mkexpr(g0), mkexpr(amt8)),
7385 binop(op, mkexpr(g0), mkU8(size-1))
7388 } else {
7389 vassert(0);
7392 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7393 return delta;
7397 /* Vector by scalar shift of E by an immediate byte. This is a
7398 straight copy of dis_SSE_shiftE_imm. */
7400 static
7401 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
7403 Bool shl, shr, sar;
7404 UChar rm = getUChar(delta);
7405 IRTemp e0 = newTemp(Ity_I64);
7406 IRTemp e1 = newTemp(Ity_I64);
7407 UChar amt, size;
7408 vassert(epartIsReg(rm));
7409 vassert(gregLO3ofRM(rm) == 2
7410 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
7411 amt = getUChar(delta+1);
7412 delta += 2;
7413 DIP("%s $%d,%s\n", opname,
7414 (Int)amt,
7415 nameMMXReg(eregLO3ofRM(rm)) );
7417 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7419 shl = shr = sar = False;
7420 size = 0;
7421 switch (op) {
7422 case Iop_ShlN16x4: shl = True; size = 16; break;
7423 case Iop_ShlN32x2: shl = True; size = 32; break;
7424 case Iop_Shl64: shl = True; size = 64; break;
7425 case Iop_SarN16x4: sar = True; size = 16; break;
7426 case Iop_SarN32x2: sar = True; size = 32; break;
7427 case Iop_ShrN16x4: shr = True; size = 16; break;
7428 case Iop_ShrN32x2: shr = True; size = 32; break;
7429 case Iop_Shr64: shr = True; size = 64; break;
7430 default: vassert(0);
7433 if (shl || shr) {
7434 assign( e1, amt >= size
7435 ? mkU64(0)
7436 : binop(op, mkexpr(e0), mkU8(amt))
7438 } else
7439 if (sar) {
7440 assign( e1, amt >= size
7441 ? binop(op, mkexpr(e0), mkU8(size-1))
7442 : binop(op, mkexpr(e0), mkU8(amt))
7444 } else {
7445 vassert(0);
7448 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7449 return delta;
7453 /* Completely handle all MMX instructions except emms. */
7455 static
7456 ULong dis_MMX ( Bool* decode_ok,
7457 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
7459 Int len;
7460 UChar modrm;
7461 HChar dis_buf[50];
7462 UChar opc = getUChar(delta);
7463 delta++;
7465 /* dis_MMX handles all insns except emms. */
7466 do_MMX_preamble();
7468 switch (opc) {
7470 case 0x6E:
7471 if (sz == 4) {
7472 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7473 modrm = getUChar(delta);
7474 if (epartIsReg(modrm)) {
7475 delta++;
7476 putMMXReg(
7477 gregLO3ofRM(modrm),
7478 binop( Iop_32HLto64,
7479 mkU32(0),
7480 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7481 DIP("movd %s, %s\n",
7482 nameIReg32(eregOfRexRM(pfx,modrm)),
7483 nameMMXReg(gregLO3ofRM(modrm)));
7484 } else {
7485 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7486 delta += len;
7487 putMMXReg(
7488 gregLO3ofRM(modrm),
7489 binop( Iop_32HLto64,
7490 mkU32(0),
7491 loadLE(Ity_I32, mkexpr(addr)) ) );
7492 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7495 else
7496 if (sz == 8) {
7497 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7498 modrm = getUChar(delta);
7499 if (epartIsReg(modrm)) {
7500 delta++;
7501 putMMXReg( gregLO3ofRM(modrm),
7502 getIReg64(eregOfRexRM(pfx,modrm)) );
7503 DIP("movd %s, %s\n",
7504 nameIReg64(eregOfRexRM(pfx,modrm)),
7505 nameMMXReg(gregLO3ofRM(modrm)));
7506 } else {
7507 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7508 delta += len;
7509 putMMXReg( gregLO3ofRM(modrm),
7510 loadLE(Ity_I64, mkexpr(addr)) );
7511 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7514 else {
7515 goto mmx_decode_failure;
7517 break;
7519 case 0x7E:
7520 if (sz == 4) {
7521 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7522 modrm = getUChar(delta);
7523 if (epartIsReg(modrm)) {
7524 delta++;
7525 putIReg32( eregOfRexRM(pfx,modrm),
7526 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7527 DIP("movd %s, %s\n",
7528 nameMMXReg(gregLO3ofRM(modrm)),
7529 nameIReg32(eregOfRexRM(pfx,modrm)));
7530 } else {
7531 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7532 delta += len;
7533 storeLE( mkexpr(addr),
7534 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7535 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7538 else
7539 if (sz == 8) {
7540 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7541 modrm = getUChar(delta);
7542 if (epartIsReg(modrm)) {
7543 delta++;
7544 putIReg64( eregOfRexRM(pfx,modrm),
7545 getMMXReg(gregLO3ofRM(modrm)) );
7546 DIP("movd %s, %s\n",
7547 nameMMXReg(gregLO3ofRM(modrm)),
7548 nameIReg64(eregOfRexRM(pfx,modrm)));
7549 } else {
7550 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7551 delta += len;
7552 storeLE( mkexpr(addr),
7553 getMMXReg(gregLO3ofRM(modrm)) );
7554 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7556 } else {
7557 goto mmx_decode_failure;
7559 break;
7561 case 0x6F:
7562 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7563 if (sz != 4
7564 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7565 goto mmx_decode_failure;
7566 modrm = getUChar(delta);
7567 if (epartIsReg(modrm)) {
7568 delta++;
7569 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7570 DIP("movq %s, %s\n",
7571 nameMMXReg(eregLO3ofRM(modrm)),
7572 nameMMXReg(gregLO3ofRM(modrm)));
7573 } else {
7574 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7575 delta += len;
7576 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7577 DIP("movq %s, %s\n",
7578 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7580 break;
7582 case 0x7F:
7583 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7584 if (sz != 4
7585 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7586 goto mmx_decode_failure;
7587 modrm = getUChar(delta);
7588 if (epartIsReg(modrm)) {
7589 delta++;
7590 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7591 DIP("movq %s, %s\n",
7592 nameMMXReg(gregLO3ofRM(modrm)),
7593 nameMMXReg(eregLO3ofRM(modrm)));
7594 } else {
7595 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7596 delta += len;
7597 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7598 DIP("mov(nt)q %s, %s\n",
7599 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7601 break;
7603 case 0xFC:
7604 case 0xFD:
7605 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7606 if (sz != 4)
7607 goto mmx_decode_failure;
7608 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
7609 break;
7611 case 0xEC:
7612 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7613 if (sz != 4
7614 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7615 goto mmx_decode_failure;
7616 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
7617 break;
7619 case 0xDC:
7620 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7621 if (sz != 4)
7622 goto mmx_decode_failure;
7623 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
7624 break;
7626 case 0xF8:
7627 case 0xF9:
7628 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7629 if (sz != 4)
7630 goto mmx_decode_failure;
7631 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
7632 break;
7634 case 0xE8:
7635 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7636 if (sz != 4)
7637 goto mmx_decode_failure;
7638 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
7639 break;
7641 case 0xD8:
7642 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7643 if (sz != 4)
7644 goto mmx_decode_failure;
7645 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
7646 break;
7648 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7649 if (sz != 4)
7650 goto mmx_decode_failure;
7651 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
7652 break;
7654 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7655 if (sz != 4)
7656 goto mmx_decode_failure;
7657 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
7658 break;
7660 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7661 vassert(sz == 4);
7662 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
7663 break;
7665 case 0x74:
7666 case 0x75:
7667 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7668 if (sz != 4)
7669 goto mmx_decode_failure;
7670 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
7671 break;
7673 case 0x64:
7674 case 0x65:
7675 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7676 if (sz != 4)
7677 goto mmx_decode_failure;
7678 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
7679 break;
7681 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7682 if (sz != 4)
7683 goto mmx_decode_failure;
7684 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
7685 break;
7687 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7688 if (sz != 4)
7689 goto mmx_decode_failure;
7690 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
7691 break;
7693 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7694 if (sz != 4)
7695 goto mmx_decode_failure;
7696 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
7697 break;
7699 case 0x68:
7700 case 0x69:
7701 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7702 if (sz != 4
7703 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7704 goto mmx_decode_failure;
7705 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
7706 break;
7708 case 0x60:
7709 case 0x61:
7710 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7711 if (sz != 4
7712 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7713 goto mmx_decode_failure;
7714 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
7715 break;
7717 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7718 if (sz != 4)
7719 goto mmx_decode_failure;
7720 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
7721 break;
7723 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7724 if (sz != 4)
7725 goto mmx_decode_failure;
7726 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
7727 break;
7729 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7730 if (sz != 4)
7731 goto mmx_decode_failure;
7732 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
7733 break;
7735 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7736 if (sz != 4)
7737 goto mmx_decode_failure;
7738 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
7739 break;
7741 # define SHIFT_BY_REG(_name,_op) \
7742 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7743 break;
7745 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7746 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7747 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7748 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7750 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7751 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7752 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7753 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7755 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7756 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7757 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7759 # undef SHIFT_BY_REG
7761 case 0x71:
7762 case 0x72:
7763 case 0x73: {
7764 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7765 UChar byte2, subopc;
7766 if (sz != 4)
7767 goto mmx_decode_failure;
7768 byte2 = getUChar(delta); /* amode / sub-opcode */
7769 subopc = toUChar( (byte2 >> 3) & 7 );
7771 # define SHIFT_BY_IMM(_name,_op) \
7772 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7773 } while (0)
7775 if (subopc == 2 /*SRL*/ && opc == 0x71)
7776 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7777 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7778 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7779 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7780 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7782 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7783 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7784 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7785 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7787 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7788 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7789 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7790 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7791 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7792 SHIFT_BY_IMM("psllq", Iop_Shl64);
7794 else goto mmx_decode_failure;
7796 # undef SHIFT_BY_IMM
7797 break;
7800 case 0xF7: {
7801 IRTemp addr = newTemp(Ity_I64);
7802 IRTemp regD = newTemp(Ity_I64);
7803 IRTemp regM = newTemp(Ity_I64);
7804 IRTemp mask = newTemp(Ity_I64);
7805 IRTemp olddata = newTemp(Ity_I64);
7806 IRTemp newdata = newTemp(Ity_I64);
7808 modrm = getUChar(delta);
7809 if (sz != 4 || (!epartIsReg(modrm)))
7810 goto mmx_decode_failure;
7811 delta++;
7813 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
7814 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7815 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7816 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7817 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7818 assign( newdata,
7819 binop(Iop_Or64,
7820 binop(Iop_And64,
7821 mkexpr(regD),
7822 mkexpr(mask) ),
7823 binop(Iop_And64,
7824 mkexpr(olddata),
7825 unop(Iop_Not64, mkexpr(mask)))) );
7826 storeLE( mkexpr(addr), mkexpr(newdata) );
7827 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7828 nameMMXReg( gregLO3ofRM(modrm) ) );
7829 break;
7832 /* --- MMX decode failure --- */
7833 default:
7834 mmx_decode_failure:
7835 *decode_ok = False;
7836 return delta; /* ignored */
7840 *decode_ok = True;
7841 return delta;
7845 /*------------------------------------------------------------*/
7846 /*--- More misc arithmetic and other obscure insns. ---*/
7847 /*------------------------------------------------------------*/
7849 /* Generate base << amt with vacated places filled with stuff
7850 from xtra. amt guaranteed in 0 .. 63. */
7851 static
7852 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7854 /* if amt == 0
7855 then base
7856 else (base << amt) | (xtra >>u (64-amt))
7858 return
7859 IRExpr_ITE(
7860 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7861 binop(Iop_Or64,
7862 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7863 binop(Iop_Shr64, mkexpr(xtra),
7864 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7866 mkexpr(base)
7870 /* Generate base >>u amt with vacated places filled with stuff
7871 from xtra. amt guaranteed in 0 .. 63. */
7872 static
7873 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7875 /* if amt == 0
7876 then base
7877 else (base >>u amt) | (xtra << (64-amt))
7879 return
7880 IRExpr_ITE(
7881 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7882 binop(Iop_Or64,
7883 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7884 binop(Iop_Shl64, mkexpr(xtra),
7885 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7887 mkexpr(base)
7891 /* Double length left and right shifts. Apparently only required in
7892 v-size (no b- variant). */
7893 static
7894 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi,
7895 Prefix pfx,
7896 Long delta, UChar modrm,
7897 Int sz,
7898 IRExpr* shift_amt,
7899 Bool amt_is_literal,
7900 const HChar* shift_amt_txt,
7901 Bool left_shift )
7903 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7904 for printing it. And eip on entry points at the modrm byte. */
7905 Int len;
7906 HChar dis_buf[50];
7908 IRType ty = szToITy(sz);
7909 IRTemp gsrc = newTemp(ty);
7910 IRTemp esrc = newTemp(ty);
7911 IRTemp addr = IRTemp_INVALID;
7912 IRTemp tmpSH = newTemp(Ity_I8);
7913 IRTemp tmpSS = newTemp(Ity_I8);
7914 IRTemp tmp64 = IRTemp_INVALID;
7915 IRTemp res64 = IRTemp_INVALID;
7916 IRTemp rss64 = IRTemp_INVALID;
7917 IRTemp resTy = IRTemp_INVALID;
7918 IRTemp rssTy = IRTemp_INVALID;
7919 Int mask = sz==8 ? 63 : 31;
7921 vassert(sz == 2 || sz == 4 || sz == 8);
7923 /* The E-part is the destination; this is shifted. The G-part
7924 supplies bits to be shifted into the E-part, but is not
7925 changed.
7927 If shifting left, form a double-length word with E at the top
7928 and G at the bottom, and shift this left. The result is then in
7929 the high part.
7931 If shifting right, form a double-length word with G at the top
7932 and E at the bottom, and shift this right. The result is then
7933 at the bottom. */
7935 /* Fetch the operands. */
7937 assign( gsrc, getIRegG(sz, pfx, modrm) );
7939 if (epartIsReg(modrm)) {
7940 delta++;
7941 assign( esrc, getIRegE(sz, pfx, modrm) );
7942 DIP("sh%cd%c %s, %s, %s\n",
7943 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7944 shift_amt_txt,
7945 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7946 } else {
7947 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
7948 /* # bytes following amode */
7949 amt_is_literal ? 1 : 0 );
7950 delta += len;
7951 assign( esrc, loadLE(ty, mkexpr(addr)) );
7952 DIP("sh%cd%c %s, %s, %s\n",
7953 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7954 shift_amt_txt,
7955 nameIRegG(sz, pfx, modrm), dis_buf);
7958 /* Calculate the masked shift amount (tmpSH), the masked subshift
7959 amount (tmpSS), the shifted value (res64) and the subshifted
7960 value (rss64). */
7962 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7963 assign( tmpSS, binop(Iop_And8,
7964 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7965 mkU8(mask)));
7967 tmp64 = newTemp(Ity_I64);
7968 res64 = newTemp(Ity_I64);
7969 rss64 = newTemp(Ity_I64);
7971 if (sz == 2 || sz == 4) {
7973 /* G is xtra; E is data */
7974 /* what a freaking nightmare: */
7975 if (sz == 4 && left_shift) {
7976 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7977 assign( res64,
7978 binop(Iop_Shr64,
7979 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7980 mkU8(32)) );
7981 assign( rss64,
7982 binop(Iop_Shr64,
7983 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7984 mkU8(32)) );
7986 else
7987 if (sz == 4 && !left_shift) {
7988 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7989 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7990 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7992 else
7993 if (sz == 2 && left_shift) {
7994 assign( tmp64,
7995 binop(Iop_32HLto64,
7996 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
7997 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
7999 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8000 assign( res64,
8001 binop(Iop_Shr64,
8002 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8003 mkU8(48)) );
8004 /* subshift formed by shifting [esrc'0000'0000'0000] */
8005 assign( rss64,
8006 binop(Iop_Shr64,
8007 binop(Iop_Shl64,
8008 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
8009 mkU8(48)),
8010 mkexpr(tmpSS)),
8011 mkU8(48)) );
8013 else
8014 if (sz == 2 && !left_shift) {
8015 assign( tmp64,
8016 binop(Iop_32HLto64,
8017 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
8018 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
8020 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8021 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8022 /* subshift formed by shifting [0000'0000'0000'esrc] */
8023 assign( rss64, binop(Iop_Shr64,
8024 unop(Iop_16Uto64, mkexpr(esrc)),
8025 mkexpr(tmpSS)) );
8028 } else {
8030 vassert(sz == 8);
8031 if (left_shift) {
8032 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
8033 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
8034 } else {
8035 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
8036 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
8041 resTy = newTemp(ty);
8042 rssTy = newTemp(ty);
8043 assign( resTy, narrowTo(ty, mkexpr(res64)) );
8044 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
8046 /* Put result back and write the flags thunk. */
8047 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
8048 resTy, rssTy, ty, tmpSH );
8050 if (epartIsReg(modrm)) {
8051 putIRegE(sz, pfx, modrm, mkexpr(resTy));
8052 } else {
8053 storeLE( mkexpr(addr), mkexpr(resTy) );
8056 if (amt_is_literal) delta++;
8057 return delta;
8061 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8062 required. */
8064 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
8066 static const HChar* nameBtOp ( BtOp op )
8068 switch (op) {
8069 case BtOpNone: return "";
8070 case BtOpSet: return "s";
8071 case BtOpReset: return "r";
8072 case BtOpComp: return "c";
8073 default: vpanic("nameBtOp(amd64)");
8078 static
8079 ULong dis_bt_G_E ( const VexAbiInfo* vbi,
8080 Prefix pfx, Int sz, Long delta, BtOp op,
8081 /*OUT*/Bool* decode_OK )
8083 HChar dis_buf[50];
8084 UChar modrm;
8085 Int len;
8086 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
8087 t_addr1, t_rsp, t_mask, t_new;
8089 vassert(sz == 2 || sz == 4 || sz == 8);
8091 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
8092 = t_addr0 = t_addr1 = t_rsp
8093 = t_mask = t_new = IRTemp_INVALID;
8095 t_fetched = newTemp(Ity_I8);
8096 t_new = newTemp(Ity_I8);
8097 t_bitno0 = newTemp(Ity_I64);
8098 t_bitno1 = newTemp(Ity_I64);
8099 t_bitno2 = newTemp(Ity_I8);
8100 t_addr1 = newTemp(Ity_I64);
8101 modrm = getUChar(delta);
8103 *decode_OK = True;
8104 if (epartIsReg(modrm)) {
8105 /* F2 and F3 are never acceptable. */
8106 if (haveF2orF3(pfx)) {
8107 *decode_OK = False;
8108 return delta;
8110 } else {
8111 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8112 present, and only for the BTC/BTS/BTR cases (not BT). */
8113 if (haveF2orF3(pfx)) {
8114 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
8115 *decode_OK = False;
8116 return delta;
8121 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
8123 if (epartIsReg(modrm)) {
8124 delta++;
8125 /* Get it onto the client's stack. Oh, this is a horrible
8126 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8127 Because of the ELF ABI stack redzone, there may be live data
8128 up to 128 bytes below %RSP. So we can't just push it on the
8129 stack, else we may wind up trashing live data, and causing
8130 impossible-to-find simulation errors. (Yes, this did
8131 happen.) So we need to drop RSP before at least 128 before
8132 pushing it. That unfortunately means hitting Memcheck's
8133 fast-case painting code. Ideally we should drop more than
8134 128, to reduce the chances of breaking buggy programs that
8135 have live data below -128(%RSP). Memcheck fast-cases moves
8136 of 288 bytes due to the need to handle ppc64-linux quickly,
8137 so let's use 288. Of course the real fix is to get rid of
8138 this kludge entirely. */
8139 t_rsp = newTemp(Ity_I64);
8140 t_addr0 = newTemp(Ity_I64);
8142 vassert(vbi->guest_stack_redzone_size == 128);
8143 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
8144 putIReg64(R_RSP, mkexpr(t_rsp));
8146 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8148 /* Make t_addr0 point at it. */
8149 assign( t_addr0, mkexpr(t_rsp) );
8151 /* Mask out upper bits of the shift amount, since we're doing a
8152 reg. */
8153 assign( t_bitno1, binop(Iop_And64,
8154 mkexpr(t_bitno0),
8155 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8157 } else {
8158 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
8159 delta += len;
8160 assign( t_bitno1, mkexpr(t_bitno0) );
8163 /* At this point: t_addr0 is the address being operated on. If it
8164 was a reg, we will have pushed it onto the client's stack.
8165 t_bitno1 is the bit number, suitably masked in the case of a
8166 reg. */
8168 /* Now the main sequence. */
8169 assign( t_addr1,
8170 binop(Iop_Add64,
8171 mkexpr(t_addr0),
8172 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8174 /* t_addr1 now holds effective address */
8176 assign( t_bitno2,
8177 unop(Iop_64to8,
8178 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8180 /* t_bitno2 contains offset of bit within byte */
8182 if (op != BtOpNone) {
8183 t_mask = newTemp(Ity_I8);
8184 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8187 /* t_mask is now a suitable byte mask */
8189 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8191 if (op != BtOpNone) {
8192 switch (op) {
8193 case BtOpSet:
8194 assign( t_new,
8195 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
8196 break;
8197 case BtOpComp:
8198 assign( t_new,
8199 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
8200 break;
8201 case BtOpReset:
8202 assign( t_new,
8203 binop(Iop_And8, mkexpr(t_fetched),
8204 unop(Iop_Not8, mkexpr(t_mask))) );
8205 break;
8206 default:
8207 vpanic("dis_bt_G_E(amd64)");
8209 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
8210 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8211 mkexpr(t_new)/*new*/,
8212 guest_RIP_curr_instr );
8213 } else {
8214 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8218 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8219 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8220 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8221 are also unchanged, so let's do that. */
8222 const ULong maskC = AMD64G_CC_MASK_C;
8223 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
8224 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
8225 | AMD64G_CC_MASK_P;
8227 IRTemp old_rflags = newTemp(Ity_I64);
8228 assign(old_rflags, mk_amd64g_calculate_rflags_all());
8230 IRTemp new_rflags = newTemp(Ity_I64);
8231 assign(new_rflags,
8232 binop(Iop_Or64,
8233 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
8234 binop(Iop_And64,
8235 binop(Iop_Shr64,
8236 unop(Iop_8Uto64, mkexpr(t_fetched)),
8237 mkexpr(t_bitno2)),
8238 mkU64(maskC))));
8240 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8241 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8242 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
8243 /* Set NDEP even though it isn't used. This makes redundant-PUT
8244 elimination of previous stores to this field work better. */
8245 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8247 /* Move reg operand from stack back to reg */
8248 if (epartIsReg(modrm)) {
8249 /* t_rsp still points at it. */
8250 /* only write the reg if actually modifying it; doing otherwise
8251 zeroes the top half erroneously when doing btl due to
8252 standard zero-extend rule */
8253 if (op != BtOpNone)
8254 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
8255 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
8258 DIP("bt%s%c %s, %s\n",
8259 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8260 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8262 return delta;
8267 /* Handle BSF/BSR. Only v-size seems necessary. */
8268 static
8269 ULong dis_bs_E_G ( const VexAbiInfo* vbi,
8270 Prefix pfx, Int sz, Long delta, Bool fwds )
8272 Bool isReg;
8273 UChar modrm;
8274 HChar dis_buf[50];
8276 IRType ty = szToITy(sz);
8277 IRTemp src = newTemp(ty);
8278 IRTemp dst = newTemp(ty);
8279 IRTemp src64 = newTemp(Ity_I64);
8280 IRTemp dst64 = newTemp(Ity_I64);
8281 IRTemp srcB = newTemp(Ity_I1);
8283 vassert(sz == 8 || sz == 4 || sz == 2);
8285 modrm = getUChar(delta);
8286 isReg = epartIsReg(modrm);
8287 if (isReg) {
8288 delta++;
8289 assign( src, getIRegE(sz, pfx, modrm) );
8290 } else {
8291 Int len;
8292 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
8293 delta += len;
8294 assign( src, loadLE(ty, mkexpr(addr)) );
8297 DIP("bs%c%c %s, %s\n",
8298 fwds ? 'f' : 'r', nameISize(sz),
8299 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8300 nameIRegG(sz, pfx, modrm));
8302 /* First, widen src to 64 bits if it is not already. */
8303 assign( src64, widenUto64(mkexpr(src)) );
8305 /* Generate a bool expression which is zero iff the original is
8306 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8307 instrumented by Memcheck, is instrumented expensively, since
8308 this may be used on the output of a preceding movmskb insn,
8309 which has been known to be partially defined, and in need of
8310 careful handling. */
8311 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
8313 /* Flags: Z is 1 iff source value is zero. All others
8314 are undefined -- we force them to zero. */
8315 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8316 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8317 stmt( IRStmt_Put(
8318 OFFB_CC_DEP1,
8319 IRExpr_ITE( mkexpr(srcB),
8320 /* src!=0 */
8321 mkU64(0),
8322 /* src==0 */
8323 mkU64(AMD64G_CC_MASK_Z)
8326 /* Set NDEP even though it isn't used. This makes redundant-PUT
8327 elimination of previous stores to this field work better. */
8328 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8330 /* Result: iff source value is zero, we can't use
8331 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8332 But anyway, amd64 semantics say the result is undefined in
8333 such situations. Hence handle the zero case specially. */
8335 /* Bleh. What we compute:
8337 bsf64: if src == 0 then {dst is unchanged}
8338 else Ctz64(src)
8340 bsr64: if src == 0 then {dst is unchanged}
8341 else 63 - Clz64(src)
8343 bsf32: if src == 0 then {dst is unchanged}
8344 else Ctz64(32Uto64(src))
8346 bsr32: if src == 0 then {dst is unchanged}
8347 else 63 - Clz64(32Uto64(src))
8349 bsf16: if src == 0 then {dst is unchanged}
8350 else Ctz64(32Uto64(16Uto32(src)))
8352 bsr16: if src == 0 then {dst is unchanged}
8353 else 63 - Clz64(32Uto64(16Uto32(src)))
8356 /* The main computation, guarding against zero. */
8357 assign( dst64,
8358 IRExpr_ITE(
8359 mkexpr(srcB),
8360 /* src != 0 */
8361 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8362 : binop(Iop_Sub64,
8363 mkU64(63),
8364 unop(Iop_Clz64, mkexpr(src64))),
8365 /* src == 0 -- leave dst unchanged */
8366 widenUto64( getIRegG( sz, pfx, modrm ) )
8370 if (sz == 2)
8371 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
8372 else
8373 if (sz == 4)
8374 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8375 else
8376 assign( dst, mkexpr(dst64) );
8378 /* dump result back */
8379 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8381 return delta;
8385 /* swap rAX with the reg specified by reg and REX.B */
8386 static
8387 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
8389 IRType ty = szToITy(sz);
8390 IRTemp t1 = newTemp(ty);
8391 IRTemp t2 = newTemp(ty);
8392 vassert(sz == 2 || sz == 4 || sz == 8);
8393 vassert(regLo3 < 8);
8394 if (sz == 8) {
8395 assign( t1, getIReg64(R_RAX) );
8396 assign( t2, getIRegRexB(8, pfx, regLo3) );
8397 putIReg64( R_RAX, mkexpr(t2) );
8398 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
8399 } else if (sz == 4) {
8400 assign( t1, getIReg32(R_RAX) );
8401 assign( t2, getIRegRexB(4, pfx, regLo3) );
8402 putIReg32( R_RAX, mkexpr(t2) );
8403 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
8404 } else {
8405 assign( t1, getIReg16(R_RAX) );
8406 assign( t2, getIRegRexB(2, pfx, regLo3) );
8407 putIReg16( R_RAX, mkexpr(t2) );
8408 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
8410 DIP("xchg%c %s, %s\n",
8411 nameISize(sz), nameIRegRAX(sz),
8412 nameIRegRexB(sz,pfx, regLo3));
8416 static
8417 void codegen_SAHF ( void )
8419 /* Set the flags to:
8420 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8421 -- retain the old O flag
8422 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8423 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8425 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8426 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8427 IRTemp oldflags = newTemp(Ity_I64);
8428 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8429 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8430 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8431 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8432 stmt( IRStmt_Put( OFFB_CC_DEP1,
8433 binop(Iop_Or64,
8434 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8435 binop(Iop_And64,
8436 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8437 mkU64(mask_SZACP))
8443 static
8444 void codegen_LAHF ( void )
8446 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8447 IRExpr* rax_with_hole;
8448 IRExpr* new_byte;
8449 IRExpr* new_rax;
8450 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8451 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8453 IRTemp flags = newTemp(Ity_I64);
8454 assign( flags, mk_amd64g_calculate_rflags_all() );
8456 rax_with_hole
8457 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8458 new_byte
8459 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8460 mkU64(1<<1));
8461 new_rax
8462 = binop(Iop_Or64, rax_with_hole,
8463 binop(Iop_Shl64, new_byte, mkU8(8)));
8464 putIReg64(R_RAX, new_rax);
8468 static
8469 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
8470 const VexAbiInfo* vbi,
8471 Prefix pfx,
8472 Int size,
8473 Long delta0 )
8475 HChar dis_buf[50];
8476 Int len;
8478 IRType ty = szToITy(size);
8479 IRTemp acc = newTemp(ty);
8480 IRTemp src = newTemp(ty);
8481 IRTemp dest = newTemp(ty);
8482 IRTemp dest2 = newTemp(ty);
8483 IRTemp acc2 = newTemp(ty);
8484 IRTemp cond = newTemp(Ity_I1);
8485 IRTemp addr = IRTemp_INVALID;
8486 UChar rm = getUChar(delta0);
8488 /* There are 3 cases to consider:
8490 reg-reg: ignore any lock prefix, generate sequence based
8491 on ITE
8493 reg-mem, not locked: ignore any lock prefix, generate sequence
8494 based on ITE
8496 reg-mem, locked: use IRCAS
8499 /* Decide whether F2 or F3 are acceptable. Never for register
8500 case, but for the memory case, one or the other is OK provided
8501 LOCK is also present. */
8502 if (epartIsReg(rm)) {
8503 if (haveF2orF3(pfx)) {
8504 *ok = False;
8505 return delta0;
8507 } else {
8508 if (haveF2orF3(pfx)) {
8509 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8510 *ok = False;
8511 return delta0;
8516 if (epartIsReg(rm)) {
8517 /* case 1 */
8518 assign( dest, getIRegE(size, pfx, rm) );
8519 delta0++;
8520 assign( src, getIRegG(size, pfx, rm) );
8521 assign( acc, getIRegRAX(size) );
8522 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8523 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8524 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8525 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8526 putIRegRAX(size, mkexpr(acc2));
8527 putIRegE(size, pfx, rm, mkexpr(dest2));
8528 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8529 nameIRegG(size,pfx,rm),
8530 nameIRegE(size,pfx,rm) );
8532 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8533 /* case 2 */
8534 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8535 assign( dest, loadLE(ty, mkexpr(addr)) );
8536 delta0 += len;
8537 assign( src, getIRegG(size, pfx, rm) );
8538 assign( acc, getIRegRAX(size) );
8539 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8540 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8541 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8542 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8543 putIRegRAX(size, mkexpr(acc2));
8544 storeLE( mkexpr(addr), mkexpr(dest2) );
8545 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8546 nameIRegG(size,pfx,rm), dis_buf);
8548 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8549 /* case 3 */
8550 /* src is new value. acc is expected value. dest is old value.
8551 Compute success from the output of the IRCAS, and steer the
8552 new value for RAX accordingly: in case of success, RAX is
8553 unchanged. */
8554 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8555 delta0 += len;
8556 assign( src, getIRegG(size, pfx, rm) );
8557 assign( acc, getIRegRAX(size) );
8558 stmt( IRStmt_CAS(
8559 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8560 NULL, mkexpr(acc), NULL, mkexpr(src) )
8562 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8563 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8564 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8565 putIRegRAX(size, mkexpr(acc2));
8566 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8567 nameIRegG(size,pfx,rm), dis_buf);
8569 else vassert(0);
8571 *ok = True;
8572 return delta0;
8576 /* Handle conditional move instructions of the form
8577 cmovcc E(reg-or-mem), G(reg)
8579 E(src) is reg-or-mem
8580 G(dst) is reg.
8582 If E is reg, --> GET %E, tmps
8583 GET %G, tmpd
8584 CMOVcc tmps, tmpd
8585 PUT tmpd, %G
8587 If E is mem --> (getAddr E) -> tmpa
8588 LD (tmpa), tmps
8589 GET %G, tmpd
8590 CMOVcc tmps, tmpd
8591 PUT tmpd, %G
8593 static
8594 ULong dis_cmov_E_G ( const VexAbiInfo* vbi,
8595 Prefix pfx,
8596 Int sz,
8597 AMD64Condcode cond,
8598 Long delta0 )
8600 UChar rm = getUChar(delta0);
8601 HChar dis_buf[50];
8602 Int len;
8604 IRType ty = szToITy(sz);
8605 IRTemp tmps = newTemp(ty);
8606 IRTemp tmpd = newTemp(ty);
8608 if (epartIsReg(rm)) {
8609 assign( tmps, getIRegE(sz, pfx, rm) );
8610 assign( tmpd, getIRegG(sz, pfx, rm) );
8612 putIRegG( sz, pfx, rm,
8613 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8614 mkexpr(tmps),
8615 mkexpr(tmpd) )
8617 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8618 nameIRegE(sz,pfx,rm),
8619 nameIRegG(sz,pfx,rm));
8620 return 1+delta0;
8623 /* E refers to memory */
8625 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8626 assign( tmps, loadLE(ty, mkexpr(addr)) );
8627 assign( tmpd, getIRegG(sz, pfx, rm) );
8629 putIRegG( sz, pfx, rm,
8630 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8631 mkexpr(tmps),
8632 mkexpr(tmpd) )
8635 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8636 dis_buf,
8637 nameIRegG(sz,pfx,rm));
8638 return len+delta0;
8643 static
8644 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
8645 const VexAbiInfo* vbi,
8646 Prefix pfx, Int sz, Long delta0 )
8648 Int len;
8649 UChar rm = getUChar(delta0);
8650 HChar dis_buf[50];
8652 IRType ty = szToITy(sz);
8653 IRTemp tmpd = newTemp(ty);
8654 IRTemp tmpt0 = newTemp(ty);
8655 IRTemp tmpt1 = newTemp(ty);
8657 /* There are 3 cases to consider:
8659 reg-reg: ignore any lock prefix,
8660 generate 'naive' (non-atomic) sequence
8662 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8663 (non-atomic) sequence
8665 reg-mem, locked: use IRCAS
8668 if (epartIsReg(rm)) {
8669 /* case 1 */
8670 assign( tmpd, getIRegE(sz, pfx, rm) );
8671 assign( tmpt0, getIRegG(sz, pfx, rm) );
8672 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8673 mkexpr(tmpd), mkexpr(tmpt0)) );
8674 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8675 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8676 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8677 DIP("xadd%c %s, %s\n",
8678 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
8679 *decode_ok = True;
8680 return 1+delta0;
8682 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8683 /* case 2 */
8684 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8685 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8686 assign( tmpt0, getIRegG(sz, pfx, rm) );
8687 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8688 mkexpr(tmpd), mkexpr(tmpt0)) );
8689 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8690 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8691 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8692 DIP("xadd%c %s, %s\n",
8693 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8694 *decode_ok = True;
8695 return len+delta0;
8697 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8698 /* case 3 */
8699 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8700 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8701 assign( tmpt0, getIRegG(sz, pfx, rm) );
8702 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8703 mkexpr(tmpd), mkexpr(tmpt0)) );
8704 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8705 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8706 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8707 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8708 DIP("xadd%c %s, %s\n",
8709 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8710 *decode_ok = True;
8711 return len+delta0;
8713 /*UNREACHED*/
8714 vassert(0);
8717 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8718 //..
8719 //.. static
8720 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8721 //.. {
8722 //.. Int len;
8723 //.. IRTemp addr;
8724 //.. UChar rm = getUChar(delta0);
8725 //.. HChar dis_buf[50];
8726 //..
8727 //.. if (epartIsReg(rm)) {
8728 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8729 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8730 //.. return 1+delta0;
8731 //.. } else {
8732 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8733 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8734 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8735 //.. return len+delta0;
8736 //.. }
8737 //.. }
8738 //..
8739 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8740 //.. dst is ireg and sz==4, zero out top half of it. */
8741 //..
8742 //.. static
8743 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8744 //.. Int sz,
8745 //.. UInt delta0 )
8746 //.. {
8747 //.. Int len;
8748 //.. IRTemp addr;
8749 //.. UChar rm = getUChar(delta0);
8750 //.. HChar dis_buf[50];
8751 //..
8752 //.. vassert(sz == 2 || sz == 4);
8753 //..
8754 //.. if (epartIsReg(rm)) {
8755 //.. if (sz == 4)
8756 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8757 //.. else
8758 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8759 //..
8760 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8761 //.. return 1+delta0;
8762 //.. } else {
8763 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8764 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8765 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8766 //.. return len+delta0;
8767 //.. }
8768 //.. }
8770 /* Handle move instructions of the form
8771 mov S, E meaning
8772 mov sreg, reg-or-mem
8773 Is passed the a ptr to the modRM byte, and the data size. Returns
8774 the address advanced completely over this instruction.
8776 VEX does not currently simulate segment registers on AMD64 which means that
8777 instead of moving a value of a segment register, zero is moved to the
8778 destination. The zero value represents a null (unused) selector. This is
8779 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8780 provide a sufficient simulation for currently seen programs that use this
8781 instruction. If some program actually decides to use the obtained segment
8782 selector for something meaningful then the zero value should be a clear
8783 indicator that there is some problem.
8785 S(src) is sreg.
8786 E(dst) is reg-or-mem
8788 If E is reg, --> PUT $0, %E
8790 If E is mem, --> (getAddr E) -> tmpa
8791 ST $0, (tmpa)
8793 static
8794 ULong dis_mov_S_E ( const VexAbiInfo* vbi,
8795 Prefix pfx,
8796 Int size,
8797 Long delta0 )
8799 Int len;
8800 UChar rm = getUChar(delta0);
8801 HChar dis_buf[50];
8803 if (epartIsReg(rm)) {
8804 putIRegE(size, pfx, rm, mkU(szToITy(size), 0));
8805 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8806 nameIRegE(size, pfx, rm));
8807 return 1+delta0;
8810 /* E refers to memory */
8812 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0);
8813 storeLE(mkexpr(addr), mkU16(0));
8814 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8815 dis_buf);
8816 return len+delta0;
8820 //.. static
8821 //.. void dis_push_segreg ( UInt sreg, Int sz )
8822 //.. {
8823 //.. IRTemp t1 = newTemp(Ity_I16);
8824 //.. IRTemp ta = newTemp(Ity_I32);
8825 //.. vassert(sz == 2 || sz == 4);
8826 //..
8827 //.. assign( t1, getSReg(sreg) );
8828 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8829 //.. putIReg(4, R_ESP, mkexpr(ta));
8830 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8831 //..
8832 //.. DIP("pushw %s\n", nameSReg(sreg));
8833 //.. }
8834 //..
8835 //.. static
8836 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8837 //.. {
8838 //.. IRTemp t1 = newTemp(Ity_I16);
8839 //.. IRTemp ta = newTemp(Ity_I32);
8840 //.. vassert(sz == 2 || sz == 4);
8841 //..
8842 //.. assign( ta, getIReg(4, R_ESP) );
8843 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8844 //..
8845 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8846 //.. putSReg( sreg, mkexpr(t1) );
8847 //.. DIP("pop %s\n", nameSReg(sreg));
8848 //.. }
8850 static
8851 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 )
8853 IRTemp t1 = newTemp(Ity_I64);
8854 IRTemp t2 = newTemp(Ity_I64);
8855 IRTemp t3 = newTemp(Ity_I64);
8856 assign(t1, getIReg64(R_RSP));
8857 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
8858 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8859 putIReg64(R_RSP, mkexpr(t3));
8860 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
8861 jmp_treg(dres, Ijk_Ret, t2);
8862 vassert(dres->whatNext == Dis_StopHere);
8866 /*------------------------------------------------------------*/
8867 /*--- SSE/SSE2/SSE3 helpers ---*/
8868 /*------------------------------------------------------------*/
8870 /* Indicates whether the op requires a rounding-mode argument. Note
8871 that this covers only vector floating point arithmetic ops, and
8872 omits the scalar ones that need rounding modes. Note also that
8873 inconsistencies here will get picked up later by the IR sanity
8874 checker, so this isn't correctness-critical. */
8875 static Bool requiresRMode ( IROp op )
8877 switch (op) {
8878 /* 128 bit ops */
8879 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8880 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8881 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8882 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8883 /* 256 bit ops */
8884 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8885 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8886 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8887 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8888 return True;
8889 default:
8890 break;
8892 return False;
8896 /* Worker function; do not call directly.
8897 Handles full width G = G `op` E and G = (not G) `op` E.
8900 static ULong dis_SSE_E_to_G_all_wrk (
8901 const VexAbiInfo* vbi,
8902 Prefix pfx, Long delta,
8903 const HChar* opname, IROp op,
8904 Bool invertG
8907 HChar dis_buf[50];
8908 Int alen;
8909 IRTemp addr;
8910 UChar rm = getUChar(delta);
8911 Bool needsRMode = requiresRMode(op);
8912 IRExpr* gpart
8913 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8914 : getXMMReg(gregOfRexRM(pfx,rm));
8915 if (epartIsReg(rm)) {
8916 putXMMReg(
8917 gregOfRexRM(pfx,rm),
8918 needsRMode
8919 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8920 gpart,
8921 getXMMReg(eregOfRexRM(pfx,rm)))
8922 : binop(op, gpart,
8923 getXMMReg(eregOfRexRM(pfx,rm)))
8925 DIP("%s %s,%s\n", opname,
8926 nameXMMReg(eregOfRexRM(pfx,rm)),
8927 nameXMMReg(gregOfRexRM(pfx,rm)) );
8928 return delta+1;
8929 } else {
8930 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8931 putXMMReg(
8932 gregOfRexRM(pfx,rm),
8933 needsRMode
8934 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8935 gpart,
8936 loadLE(Ity_V128, mkexpr(addr)))
8937 : binop(op, gpart,
8938 loadLE(Ity_V128, mkexpr(addr)))
8940 DIP("%s %s,%s\n", opname,
8941 dis_buf,
8942 nameXMMReg(gregOfRexRM(pfx,rm)) );
8943 return delta+alen;
8948 /* All lanes SSE binary operation, G = G `op` E. */
8950 static
8951 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi,
8952 Prefix pfx, Long delta,
8953 const HChar* opname, IROp op )
8955 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
8958 /* All lanes SSE binary operation, G = (not G) `op` E. */
8960 static
8961 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi,
8962 Prefix pfx, Long delta,
8963 const HChar* opname, IROp op )
8965 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
8969 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8971 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi,
8972 Prefix pfx, Long delta,
8973 const HChar* opname, IROp op )
8975 HChar dis_buf[50];
8976 Int alen;
8977 IRTemp addr;
8978 UChar rm = getUChar(delta);
8979 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8980 if (epartIsReg(rm)) {
8981 putXMMReg( gregOfRexRM(pfx,rm),
8982 binop(op, gpart,
8983 getXMMReg(eregOfRexRM(pfx,rm))) );
8984 DIP("%s %s,%s\n", opname,
8985 nameXMMReg(eregOfRexRM(pfx,rm)),
8986 nameXMMReg(gregOfRexRM(pfx,rm)) );
8987 return delta+1;
8988 } else {
8989 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8990 E operand needs to be made simply of zeroes. */
8991 IRTemp epart = newTemp(Ity_V128);
8992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
8993 assign( epart, unop( Iop_32UtoV128,
8994 loadLE(Ity_I32, mkexpr(addr))) );
8995 putXMMReg( gregOfRexRM(pfx,rm),
8996 binop(op, gpart, mkexpr(epart)) );
8997 DIP("%s %s,%s\n", opname,
8998 dis_buf,
8999 nameXMMReg(gregOfRexRM(pfx,rm)) );
9000 return delta+alen;
9005 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9007 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi,
9008 Prefix pfx, Long delta,
9009 const HChar* opname, IROp op )
9011 HChar dis_buf[50];
9012 Int alen;
9013 IRTemp addr;
9014 UChar rm = getUChar(delta);
9015 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9016 if (epartIsReg(rm)) {
9017 putXMMReg( gregOfRexRM(pfx,rm),
9018 binop(op, gpart,
9019 getXMMReg(eregOfRexRM(pfx,rm))) );
9020 DIP("%s %s,%s\n", opname,
9021 nameXMMReg(eregOfRexRM(pfx,rm)),
9022 nameXMMReg(gregOfRexRM(pfx,rm)) );
9023 return delta+1;
9024 } else {
9025 /* We can only do a 64-bit memory read, so the upper half of the
9026 E operand needs to be made simply of zeroes. */
9027 IRTemp epart = newTemp(Ity_V128);
9028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9029 assign( epart, unop( Iop_64UtoV128,
9030 loadLE(Ity_I64, mkexpr(addr))) );
9031 putXMMReg( gregOfRexRM(pfx,rm),
9032 binop(op, gpart, mkexpr(epart)) );
9033 DIP("%s %s,%s\n", opname,
9034 dis_buf,
9035 nameXMMReg(gregOfRexRM(pfx,rm)) );
9036 return delta+alen;
9041 /* All lanes unary SSE operation, G = op(E). */
9043 static ULong dis_SSE_E_to_G_unary_all (
9044 const VexAbiInfo* vbi,
9045 Prefix pfx, Long delta,
9046 const HChar* opname, IROp op
9049 HChar dis_buf[50];
9050 Int alen;
9051 IRTemp addr;
9052 UChar rm = getUChar(delta);
9053 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9054 // up in the usual way.
9055 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
9056 if (epartIsReg(rm)) {
9057 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm));
9058 /* XXXROUNDINGFIXME */
9059 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9060 : unop(op, src);
9061 putXMMReg( gregOfRexRM(pfx,rm), res );
9062 DIP("%s %s,%s\n", opname,
9063 nameXMMReg(eregOfRexRM(pfx,rm)),
9064 nameXMMReg(gregOfRexRM(pfx,rm)) );
9065 return delta+1;
9066 } else {
9067 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9068 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
9069 /* XXXROUNDINGFIXME */
9070 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9071 : unop(op, src);
9072 putXMMReg( gregOfRexRM(pfx,rm), res );
9073 DIP("%s %s,%s\n", opname,
9074 dis_buf,
9075 nameXMMReg(gregOfRexRM(pfx,rm)) );
9076 return delta+alen;
9081 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9083 static ULong dis_SSE_E_to_G_unary_lo32 (
9084 const VexAbiInfo* vbi,
9085 Prefix pfx, Long delta,
9086 const HChar* opname, IROp op
9089 /* First we need to get the old G value and patch the low 32 bits
9090 of the E operand into it. Then apply op and write back to G. */
9091 HChar dis_buf[50];
9092 Int alen;
9093 IRTemp addr;
9094 UChar rm = getUChar(delta);
9095 IRTemp oldG0 = newTemp(Ity_V128);
9096 IRTemp oldG1 = newTemp(Ity_V128);
9098 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9100 if (epartIsReg(rm)) {
9101 assign( oldG1,
9102 binop( Iop_SetV128lo32,
9103 mkexpr(oldG0),
9104 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
9105 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9106 DIP("%s %s,%s\n", opname,
9107 nameXMMReg(eregOfRexRM(pfx,rm)),
9108 nameXMMReg(gregOfRexRM(pfx,rm)) );
9109 return delta+1;
9110 } else {
9111 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9112 assign( oldG1,
9113 binop( Iop_SetV128lo32,
9114 mkexpr(oldG0),
9115 loadLE(Ity_I32, mkexpr(addr)) ));
9116 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9117 DIP("%s %s,%s\n", opname,
9118 dis_buf,
9119 nameXMMReg(gregOfRexRM(pfx,rm)) );
9120 return delta+alen;
9125 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9127 static ULong dis_SSE_E_to_G_unary_lo64 (
9128 const VexAbiInfo* vbi,
9129 Prefix pfx, Long delta,
9130 const HChar* opname, IROp op
9133 /* First we need to get the old G value and patch the low 64 bits
9134 of the E operand into it. Then apply op and write back to G. */
9135 HChar dis_buf[50];
9136 Int alen;
9137 IRTemp addr;
9138 UChar rm = getUChar(delta);
9139 IRTemp oldG0 = newTemp(Ity_V128);
9140 IRTemp oldG1 = newTemp(Ity_V128);
9142 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9144 if (epartIsReg(rm)) {
9145 assign( oldG1,
9146 binop( Iop_SetV128lo64,
9147 mkexpr(oldG0),
9148 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
9149 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9150 DIP("%s %s,%s\n", opname,
9151 nameXMMReg(eregOfRexRM(pfx,rm)),
9152 nameXMMReg(gregOfRexRM(pfx,rm)) );
9153 return delta+1;
9154 } else {
9155 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9156 assign( oldG1,
9157 binop( Iop_SetV128lo64,
9158 mkexpr(oldG0),
9159 loadLE(Ity_I64, mkexpr(addr)) ));
9160 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9161 DIP("%s %s,%s\n", opname,
9162 dis_buf,
9163 nameXMMReg(gregOfRexRM(pfx,rm)) );
9164 return delta+alen;
9169 /* SSE integer binary operation:
9170 G = G `op` E (eLeft == False)
9171 G = E `op` G (eLeft == True)
9173 static ULong dis_SSEint_E_to_G(
9174 const VexAbiInfo* vbi,
9175 Prefix pfx, Long delta,
9176 const HChar* opname, IROp op,
9177 Bool eLeft
9180 HChar dis_buf[50];
9181 Int alen;
9182 IRTemp addr;
9183 UChar rm = getUChar(delta);
9184 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9185 IRExpr* epart = NULL;
9186 if (epartIsReg(rm)) {
9187 epart = getXMMReg(eregOfRexRM(pfx,rm));
9188 DIP("%s %s,%s\n", opname,
9189 nameXMMReg(eregOfRexRM(pfx,rm)),
9190 nameXMMReg(gregOfRexRM(pfx,rm)) );
9191 delta += 1;
9192 } else {
9193 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9194 epart = loadLE(Ity_V128, mkexpr(addr));
9195 DIP("%s %s,%s\n", opname,
9196 dis_buf,
9197 nameXMMReg(gregOfRexRM(pfx,rm)) );
9198 delta += alen;
9200 putXMMReg( gregOfRexRM(pfx,rm),
9201 eLeft ? binop(op, epart, gpart)
9202 : binop(op, gpart, epart) );
9203 return delta;
9207 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9208 This is all a bit of a kludge in that it ignores the subtleties of
9209 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9210 spec. */
9211 static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP,
9212 /*OUT*/IROp* opP,
9213 /*OUT*/Bool* postNotP,
9214 UInt imm8, Bool all_lanes, Int sz )
9216 if (imm8 >= 32) return False;
9218 /* First, compute a (preSwap, op, postNot) triple from
9219 the supplied imm8. */
9220 Bool pre = False;
9221 IROp op = Iop_INVALID;
9222 Bool not = False;
9224 # define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; }
9225 // If you add a case here, add a corresponding test for both VCMPSD_128
9226 // and VCMPSS_128 in avx-1.c.
9227 // Cases 0xA and above are
9228 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9229 switch (imm8) {
9230 // "O" = ordered, "U" = unordered
9231 // "Q" = non-signalling (quiet), "S" = signalling
9233 // swap operands?
9234 // |
9235 // | cmp op invert after?
9236 // | | |
9237 // v v v
9238 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9239 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
9240 case 0x10: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OS
9241 case 0x18: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_US
9243 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS
9244 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9246 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS
9247 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9249 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9250 case 0x13: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_S
9252 // 0xC: this isn't really right because it returns all-1s when
9253 // either operand is a NaN, and it should return all-0s.
9254 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9255 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9256 case 0x14: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_US
9257 case 0x1C: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS
9259 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US
9260 case 0x15: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_UQ
9262 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US
9263 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9265 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q
9266 case 0x17: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_S
9268 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US
9269 case 0x19: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_UQ
9271 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US
9272 case 0x1A: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_UQ
9274 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS
9275 case 0x1D: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OQ
9277 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS
9278 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9279 // Unhandled:
9280 // 0xB FALSE_OQ
9281 // 0xF TRUE_UQ
9282 // 0x1B FALSE_OS
9283 // 0x1F TRUE_US
9284 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9285 avx-1.c if new cases turn up. */
9286 default: break;
9288 # undef XXX
9289 if (op == Iop_INVALID) return False;
9291 /* Now convert the op into one with the same arithmetic but that is
9292 correct for the width and laneage requirements. */
9294 /**/ if (sz == 4 && all_lanes) {
9295 switch (op) {
9296 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9297 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9298 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9299 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9300 default: vassert(0);
9303 else if (sz == 4 && !all_lanes) {
9304 switch (op) {
9305 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9306 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9307 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9308 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9309 default: vassert(0);
9312 else if (sz == 8 && all_lanes) {
9313 switch (op) {
9314 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9315 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9316 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9317 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9318 default: vassert(0);
9321 else if (sz == 8 && !all_lanes) {
9322 switch (op) {
9323 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9324 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9325 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9326 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9327 default: vassert(0);
9330 else {
9331 vpanic("findSSECmpOp(amd64,guest)");
9334 *preSwapP = pre; *opP = op; *postNotP = not;
9335 return True;
9339 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9340 returns the original delta to indicate failure. */
9342 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi,
9343 Prefix pfx, Long delta,
9344 const HChar* opname, Bool all_lanes, Int sz )
9346 Long delta0 = delta;
9347 HChar dis_buf[50];
9348 Int alen;
9349 UInt imm8;
9350 IRTemp addr;
9351 Bool preSwap = False;
9352 IROp op = Iop_INVALID;
9353 Bool postNot = False;
9354 IRTemp plain = newTemp(Ity_V128);
9355 UChar rm = getUChar(delta);
9356 UShort mask = 0;
9357 vassert(sz == 4 || sz == 8);
9358 if (epartIsReg(rm)) {
9359 imm8 = getUChar(delta+1);
9360 if (imm8 >= 8) return delta0; /* FAIL */
9361 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
9362 if (!ok) return delta0; /* FAIL */
9363 vassert(!preSwap); /* never needed for imm8 < 8 */
9364 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9365 getXMMReg(eregOfRexRM(pfx,rm))) );
9366 delta += 2;
9367 DIP("%s $%u,%s,%s\n", opname,
9368 imm8,
9369 nameXMMReg(eregOfRexRM(pfx,rm)),
9370 nameXMMReg(gregOfRexRM(pfx,rm)) );
9371 } else {
9372 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
9373 imm8 = getUChar(delta+alen);
9374 if (imm8 >= 8) return delta0; /* FAIL */
9375 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
9376 if (!ok) return delta0; /* FAIL */
9377 vassert(!preSwap); /* never needed for imm8 < 8 */
9378 assign( plain,
9379 binop(
9381 getXMMReg(gregOfRexRM(pfx,rm)),
9382 all_lanes
9383 ? loadLE(Ity_V128, mkexpr(addr))
9384 : sz == 8
9385 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9386 : /*sz==4*/
9387 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
9390 delta += alen+1;
9391 DIP("%s $%u,%s,%s\n", opname,
9392 imm8,
9393 dis_buf,
9394 nameXMMReg(gregOfRexRM(pfx,rm)) );
9397 if (postNot && all_lanes) {
9398 putXMMReg( gregOfRexRM(pfx,rm),
9399 unop(Iop_NotV128, mkexpr(plain)) );
9401 else
9402 if (postNot && !all_lanes) {
9403 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
9404 putXMMReg( gregOfRexRM(pfx,rm),
9405 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9407 else {
9408 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9411 return delta;
9415 /* Vector by scalar shift of G by the amount specified at the bottom
9416 of E. */
9418 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi,
9419 Prefix pfx, Long delta,
9420 const HChar* opname, IROp op )
9422 HChar dis_buf[50];
9423 Int alen, size;
9424 IRTemp addr;
9425 Bool shl, shr, sar;
9426 UChar rm = getUChar(delta);
9427 IRTemp g0 = newTemp(Ity_V128);
9428 IRTemp g1 = newTemp(Ity_V128);
9429 IRTemp amt = newTemp(Ity_I64);
9430 IRTemp amt8 = newTemp(Ity_I8);
9431 if (epartIsReg(rm)) {
9432 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
9433 DIP("%s %s,%s\n", opname,
9434 nameXMMReg(eregOfRexRM(pfx,rm)),
9435 nameXMMReg(gregOfRexRM(pfx,rm)) );
9436 delta++;
9437 } else {
9438 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9439 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
9440 DIP("%s %s,%s\n", opname,
9441 dis_buf,
9442 nameXMMReg(gregOfRexRM(pfx,rm)) );
9443 delta += alen;
9445 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
9446 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
9448 shl = shr = sar = False;
9449 size = 0;
9450 switch (op) {
9451 case Iop_ShlN16x8: shl = True; size = 32; break;
9452 case Iop_ShlN32x4: shl = True; size = 32; break;
9453 case Iop_ShlN64x2: shl = True; size = 64; break;
9454 case Iop_SarN16x8: sar = True; size = 16; break;
9455 case Iop_SarN32x4: sar = True; size = 32; break;
9456 case Iop_ShrN16x8: shr = True; size = 16; break;
9457 case Iop_ShrN32x4: shr = True; size = 32; break;
9458 case Iop_ShrN64x2: shr = True; size = 64; break;
9459 default: vassert(0);
9462 if (shl || shr) {
9463 assign(
9465 IRExpr_ITE(
9466 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9467 binop(op, mkexpr(g0), mkexpr(amt8)),
9468 mkV128(0x0000)
9471 } else
9472 if (sar) {
9473 assign(
9475 IRExpr_ITE(
9476 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9477 binop(op, mkexpr(g0), mkexpr(amt8)),
9478 binop(op, mkexpr(g0), mkU8(size-1))
9481 } else {
9482 vassert(0);
9485 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9486 return delta;
9490 /* Vector by scalar shift of E by an immediate byte. */
9492 static
9493 ULong dis_SSE_shiftE_imm ( Prefix pfx,
9494 Long delta, const HChar* opname, IROp op )
9496 Bool shl, shr, sar;
9497 UChar rm = getUChar(delta);
9498 IRTemp e0 = newTemp(Ity_V128);
9499 IRTemp e1 = newTemp(Ity_V128);
9500 UChar amt, size;
9501 vassert(epartIsReg(rm));
9502 vassert(gregLO3ofRM(rm) == 2
9503 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
9504 amt = getUChar(delta+1);
9505 delta += 2;
9506 DIP("%s $%d,%s\n", opname,
9507 (Int)amt,
9508 nameXMMReg(eregOfRexRM(pfx,rm)) );
9509 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9511 shl = shr = sar = False;
9512 size = 0;
9513 switch (op) {
9514 case Iop_ShlN16x8: shl = True; size = 16; break;
9515 case Iop_ShlN32x4: shl = True; size = 32; break;
9516 case Iop_ShlN64x2: shl = True; size = 64; break;
9517 case Iop_SarN16x8: sar = True; size = 16; break;
9518 case Iop_SarN32x4: sar = True; size = 32; break;
9519 case Iop_ShrN16x8: shr = True; size = 16; break;
9520 case Iop_ShrN32x4: shr = True; size = 32; break;
9521 case Iop_ShrN64x2: shr = True; size = 64; break;
9522 default: vassert(0);
9525 if (shl || shr) {
9526 assign( e1, amt >= size
9527 ? mkV128(0x0000)
9528 : binop(op, mkexpr(e0), mkU8(amt))
9530 } else
9531 if (sar) {
9532 assign( e1, amt >= size
9533 ? binop(op, mkexpr(e0), mkU8(size-1))
9534 : binop(op, mkexpr(e0), mkU8(amt))
9536 } else {
9537 vassert(0);
9540 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9541 return delta;
9545 /* Get the current SSE rounding mode. */
9547 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9549 return
9550 unop( Iop_64to32,
9551 binop( Iop_And64,
9552 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9553 mkU64(3) ));
9556 static void put_sse_roundingmode ( IRExpr* sseround )
9558 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
9559 stmt( IRStmt_Put( OFFB_SSEROUND,
9560 unop(Iop_32Uto64,sseround) ) );
9563 /* Break a V128-bit value up into four 32-bit ints. */
9565 static void breakupV128to32s ( IRTemp t128,
9566 /*OUTs*/
9567 IRTemp* t3, IRTemp* t2,
9568 IRTemp* t1, IRTemp* t0 )
9570 IRTemp hi64 = newTemp(Ity_I64);
9571 IRTemp lo64 = newTemp(Ity_I64);
9572 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9573 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9575 vassert(t0 && *t0 == IRTemp_INVALID);
9576 vassert(t1 && *t1 == IRTemp_INVALID);
9577 vassert(t2 && *t2 == IRTemp_INVALID);
9578 vassert(t3 && *t3 == IRTemp_INVALID);
9580 *t0 = newTemp(Ity_I32);
9581 *t1 = newTemp(Ity_I32);
9582 *t2 = newTemp(Ity_I32);
9583 *t3 = newTemp(Ity_I32);
9584 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9585 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9586 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9587 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9590 /* Construct a V128-bit value from four 32-bit ints. */
9592 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9593 IRTemp t1, IRTemp t0 )
9595 return
9596 binop( Iop_64HLtoV128,
9597 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9598 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9602 /* Break a 64-bit value up into four 16-bit ints. */
9604 static void breakup64to16s ( IRTemp t64,
9605 /*OUTs*/
9606 IRTemp* t3, IRTemp* t2,
9607 IRTemp* t1, IRTemp* t0 )
9609 IRTemp hi32 = newTemp(Ity_I32);
9610 IRTemp lo32 = newTemp(Ity_I32);
9611 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9612 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9614 vassert(t0 && *t0 == IRTemp_INVALID);
9615 vassert(t1 && *t1 == IRTemp_INVALID);
9616 vassert(t2 && *t2 == IRTemp_INVALID);
9617 vassert(t3 && *t3 == IRTemp_INVALID);
9619 *t0 = newTemp(Ity_I16);
9620 *t1 = newTemp(Ity_I16);
9621 *t2 = newTemp(Ity_I16);
9622 *t3 = newTemp(Ity_I16);
9623 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9624 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9625 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9626 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9629 /* Construct a 64-bit value from four 16-bit ints. */
9631 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9632 IRTemp t1, IRTemp t0 )
9634 return
9635 binop( Iop_32HLto64,
9636 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9637 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9641 /* Break a V256-bit value up into four 64-bit ints. */
9643 static void breakupV256to64s ( IRTemp t256,
9644 /*OUTs*/
9645 IRTemp* t3, IRTemp* t2,
9646 IRTemp* t1, IRTemp* t0 )
9648 vassert(t0 && *t0 == IRTemp_INVALID);
9649 vassert(t1 && *t1 == IRTemp_INVALID);
9650 vassert(t2 && *t2 == IRTemp_INVALID);
9651 vassert(t3 && *t3 == IRTemp_INVALID);
9652 *t0 = newTemp(Ity_I64);
9653 *t1 = newTemp(Ity_I64);
9654 *t2 = newTemp(Ity_I64);
9655 *t3 = newTemp(Ity_I64);
9656 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9657 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9658 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9659 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9662 /* Break a V256-bit value up into two V128s. */
9664 static void breakupV256toV128s ( IRTemp t256,
9665 /*OUTs*/
9666 IRTemp* t1, IRTemp* t0 )
9668 vassert(t0 && *t0 == IRTemp_INVALID);
9669 vassert(t1 && *t1 == IRTemp_INVALID);
9670 *t0 = newTemp(Ity_V128);
9671 *t1 = newTemp(Ity_V128);
9672 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9673 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9676 /* Break a V256-bit value up into eight 32-bit ints. */
9678 static void breakupV256to32s ( IRTemp t256,
9679 /*OUTs*/
9680 IRTemp* t7, IRTemp* t6,
9681 IRTemp* t5, IRTemp* t4,
9682 IRTemp* t3, IRTemp* t2,
9683 IRTemp* t1, IRTemp* t0 )
9685 IRTemp t128_1 = IRTemp_INVALID;
9686 IRTemp t128_0 = IRTemp_INVALID;
9687 breakupV256toV128s( t256, &t128_1, &t128_0 );
9688 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9689 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9692 /* Break a V128-bit value up into two 64-bit ints. */
9694 static void breakupV128to64s ( IRTemp t128,
9695 /*OUTs*/
9696 IRTemp* t1, IRTemp* t0 )
9698 vassert(t0 && *t0 == IRTemp_INVALID);
9699 vassert(t1 && *t1 == IRTemp_INVALID);
9700 *t0 = newTemp(Ity_I64);
9701 *t1 = newTemp(Ity_I64);
9702 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9703 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9706 /* Construct a V256-bit value from eight 32-bit ints. */
9708 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9709 IRTemp t5, IRTemp t4,
9710 IRTemp t3, IRTemp t2,
9711 IRTemp t1, IRTemp t0 )
9713 return
9714 binop( Iop_V128HLtoV256,
9715 binop( Iop_64HLtoV128,
9716 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9717 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9718 binop( Iop_64HLtoV128,
9719 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9720 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9724 /* Construct a V256-bit value from four 64-bit ints. */
9726 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9727 IRTemp t1, IRTemp t0 )
9729 return
9730 binop( Iop_V128HLtoV256,
9731 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9732 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9736 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9737 values (aa,bb), computes, for each of the 4 16-bit lanes:
9739 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9741 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9743 IRTemp aa = newTemp(Ity_I64);
9744 IRTemp bb = newTemp(Ity_I64);
9745 IRTemp aahi32s = newTemp(Ity_I64);
9746 IRTemp aalo32s = newTemp(Ity_I64);
9747 IRTemp bbhi32s = newTemp(Ity_I64);
9748 IRTemp bblo32s = newTemp(Ity_I64);
9749 IRTemp rHi = newTemp(Ity_I64);
9750 IRTemp rLo = newTemp(Ity_I64);
9751 IRTemp one32x2 = newTemp(Ity_I64);
9752 assign(aa, aax);
9753 assign(bb, bbx);
9754 assign( aahi32s,
9755 binop(Iop_SarN32x2,
9756 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9757 mkU8(16) ));
9758 assign( aalo32s,
9759 binop(Iop_SarN32x2,
9760 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9761 mkU8(16) ));
9762 assign( bbhi32s,
9763 binop(Iop_SarN32x2,
9764 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9765 mkU8(16) ));
9766 assign( bblo32s,
9767 binop(Iop_SarN32x2,
9768 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9769 mkU8(16) ));
9770 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9771 assign(
9772 rHi,
9773 binop(
9774 Iop_ShrN32x2,
9775 binop(
9776 Iop_Add32x2,
9777 binop(
9778 Iop_ShrN32x2,
9779 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9780 mkU8(14)
9782 mkexpr(one32x2)
9784 mkU8(1)
9787 assign(
9788 rLo,
9789 binop(
9790 Iop_ShrN32x2,
9791 binop(
9792 Iop_Add32x2,
9793 binop(
9794 Iop_ShrN32x2,
9795 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9796 mkU8(14)
9798 mkexpr(one32x2)
9800 mkU8(1)
9803 return
9804 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9807 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9808 values (aa,bb), computes, for each lane:
9810 if aa_lane < 0 then - bb_lane
9811 else if aa_lane > 0 then bb_lane
9812 else 0
9814 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9816 IRTemp aa = newTemp(Ity_I64);
9817 IRTemp bb = newTemp(Ity_I64);
9818 IRTemp zero = newTemp(Ity_I64);
9819 IRTemp bbNeg = newTemp(Ity_I64);
9820 IRTemp negMask = newTemp(Ity_I64);
9821 IRTemp posMask = newTemp(Ity_I64);
9822 IROp opSub = Iop_INVALID;
9823 IROp opCmpGTS = Iop_INVALID;
9825 switch (laneszB) {
9826 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9827 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9828 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9829 default: vassert(0);
9832 assign( aa, aax );
9833 assign( bb, bbx );
9834 assign( zero, mkU64(0) );
9835 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9836 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9837 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9839 return
9840 binop(Iop_Or64,
9841 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9842 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9847 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9848 value aa, computes, for each lane
9850 if aa < 0 then -aa else aa
9852 Note that the result is interpreted as unsigned, so that the
9853 absolute value of the most negative signed input can be
9854 represented.
9856 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
9858 IRTemp res = newTemp(Ity_I64);
9859 IRTemp zero = newTemp(Ity_I64);
9860 IRTemp aaNeg = newTemp(Ity_I64);
9861 IRTemp negMask = newTemp(Ity_I64);
9862 IRTemp posMask = newTemp(Ity_I64);
9863 IROp opSub = Iop_INVALID;
9864 IROp opSarN = Iop_INVALID;
9866 switch (laneszB) {
9867 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9868 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9869 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
9870 default: vassert(0);
9873 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
9874 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
9875 assign( zero, mkU64(0) );
9876 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
9877 assign( res,
9878 binop(Iop_Or64,
9879 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
9880 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
9881 return res;
9884 /* XMM version of math_PABS_MMX. */
9885 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
9887 IRTemp res = newTemp(Ity_V128);
9888 IRTemp aaHi = newTemp(Ity_I64);
9889 IRTemp aaLo = newTemp(Ity_I64);
9890 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
9891 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
9892 assign(res, binop(Iop_64HLtoV128,
9893 mkexpr(math_PABS_MMX(aaHi, laneszB)),
9894 mkexpr(math_PABS_MMX(aaLo, laneszB))));
9895 return res;
9898 /* Specialisations of math_PABS_XMM, since there's no easy way to do
9899 partial applications in C :-( */
9900 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
9901 return math_PABS_XMM(aa, 4);
9904 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
9905 return math_PABS_XMM(aa, 2);
9908 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
9909 return math_PABS_XMM(aa, 1);
9912 /* YMM version of math_PABS_XMM. */
9913 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
9915 IRTemp res = newTemp(Ity_V256);
9916 IRTemp aaHi = IRTemp_INVALID;
9917 IRTemp aaLo = IRTemp_INVALID;
9918 breakupV256toV128s(aa, &aaHi, &aaLo);
9919 assign(res, binop(Iop_V128HLtoV256,
9920 mkexpr(math_PABS_XMM(aaHi, laneszB)),
9921 mkexpr(math_PABS_XMM(aaLo, laneszB))));
9922 return res;
9925 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
9926 return math_PABS_YMM(aa, 4);
9929 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
9930 return math_PABS_YMM(aa, 2);
9933 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
9934 return math_PABS_YMM(aa, 1);
9937 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
9938 IRTemp lo64, Long byteShift )
9940 vassert(byteShift >= 1 && byteShift <= 7);
9941 return
9942 binop(Iop_Or64,
9943 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
9944 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
9948 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
9950 IRTemp res = newTemp(Ity_V128);
9951 IRTemp sHi = newTemp(Ity_I64);
9952 IRTemp sLo = newTemp(Ity_I64);
9953 IRTemp dHi = newTemp(Ity_I64);
9954 IRTemp dLo = newTemp(Ity_I64);
9955 IRTemp rHi = newTemp(Ity_I64);
9956 IRTemp rLo = newTemp(Ity_I64);
9958 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
9959 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
9960 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
9961 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
9963 if (imm8 == 0) {
9964 assign( rHi, mkexpr(sHi) );
9965 assign( rLo, mkexpr(sLo) );
9967 else if (imm8 >= 1 && imm8 <= 7) {
9968 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
9969 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
9971 else if (imm8 == 8) {
9972 assign( rHi, mkexpr(dLo) );
9973 assign( rLo, mkexpr(sHi) );
9975 else if (imm8 >= 9 && imm8 <= 15) {
9976 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
9977 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
9979 else if (imm8 == 16) {
9980 assign( rHi, mkexpr(dHi) );
9981 assign( rLo, mkexpr(dLo) );
9983 else if (imm8 >= 17 && imm8 <= 23) {
9984 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
9985 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
9987 else if (imm8 == 24) {
9988 assign( rHi, mkU64(0) );
9989 assign( rLo, mkexpr(dHi) );
9991 else if (imm8 >= 25 && imm8 <= 31) {
9992 assign( rHi, mkU64(0) );
9993 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
9995 else if (imm8 >= 32 && imm8 <= 255) {
9996 assign( rHi, mkU64(0) );
9997 assign( rLo, mkU64(0) );
9999 else
10000 vassert(0);
10002 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
10003 return res;
10007 /* Generate a SIGSEGV followed by a restart of the current instruction
10008 if effective_addr is not 16-aligned. This is required behaviour
10009 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10010 This assumes that guest_RIP_curr_instr is set correctly! */
10011 static
10012 void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask )
10014 stmt(
10015 IRStmt_Exit(
10016 binop(Iop_CmpNE64,
10017 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
10018 mkU64(0)),
10019 Ijk_SigSEGV,
10020 IRConst_U64(guest_RIP_curr_instr),
10021 OFFB_RIP
10026 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) {
10027 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1);
10030 static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) {
10031 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
10034 static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) {
10035 gen_SEGV_if_not_XX_aligned(effective_addr, 64-1);
10038 /* Helper for deciding whether a given insn (starting at the opcode
10039 byte) may validly be used with a LOCK prefix. The following insns
10040 may be used with LOCK when their destination operand is in memory.
10041 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10043 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10044 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10045 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10046 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10047 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10048 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10049 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10051 DEC FE /1, FF /1
10052 INC FE /0, FF /0
10054 NEG F6 /3, F7 /3
10055 NOT F6 /2, F7 /2
10057 XCHG 86, 87
10059 BTC 0F BB, 0F BA /7
10060 BTR 0F B3, 0F BA /6
10061 BTS 0F AB, 0F BA /5
10063 CMPXCHG 0F B0, 0F B1
10064 CMPXCHG8B 0F C7 /1
10066 XADD 0F C0, 0F C1
10068 ------------------------------
10070 80 /0 = addb $imm8, rm8
10071 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10072 82 /0 = addb $imm8, rm8
10073 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10075 00 = addb r8, rm8
10076 01 = addl r32, rm32 and addw r16, rm16
10078 Same for ADD OR ADC SBB AND SUB XOR
10080 FE /1 = dec rm8
10081 FF /1 = dec rm32 and dec rm16
10083 FE /0 = inc rm8
10084 FF /0 = inc rm32 and inc rm16
10086 F6 /3 = neg rm8
10087 F7 /3 = neg rm32 and neg rm16
10089 F6 /2 = not rm8
10090 F7 /2 = not rm32 and not rm16
10092 0F BB = btcw r16, rm16 and btcl r32, rm32
10093 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10095 Same for BTS, BTR
10097 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
10099 switch (opc[0]) {
10100 case 0x00: case 0x01: case 0x08: case 0x09:
10101 case 0x10: case 0x11: case 0x18: case 0x19:
10102 case 0x20: case 0x21: case 0x28: case 0x29:
10103 case 0x30: case 0x31:
10104 if (!epartIsReg(opc[1]))
10105 return True;
10106 break;
10108 case 0x80: case 0x81: case 0x82: case 0x83:
10109 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
10110 && !epartIsReg(opc[1]))
10111 return True;
10112 break;
10114 case 0xFE: case 0xFF:
10115 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
10116 && !epartIsReg(opc[1]))
10117 return True;
10118 break;
10120 case 0xF6: case 0xF7:
10121 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
10122 && !epartIsReg(opc[1]))
10123 return True;
10124 break;
10126 case 0x86: case 0x87:
10127 if (!epartIsReg(opc[1]))
10128 return True;
10129 break;
10131 case 0x0F: {
10132 switch (opc[1]) {
10133 case 0xBB: case 0xB3: case 0xAB:
10134 if (!epartIsReg(opc[2]))
10135 return True;
10136 break;
10137 case 0xBA:
10138 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
10139 && !epartIsReg(opc[2]))
10140 return True;
10141 break;
10142 case 0xB0: case 0xB1:
10143 if (!epartIsReg(opc[2]))
10144 return True;
10145 break;
10146 case 0xC7:
10147 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
10148 return True;
10149 break;
10150 case 0xC0: case 0xC1:
10151 if (!epartIsReg(opc[2]))
10152 return True;
10153 break;
10154 default:
10155 break;
10156 } /* switch (opc[1]) */
10157 break;
10160 default:
10161 break;
10162 } /* switch (opc[0]) */
10164 return False;
10168 /*------------------------------------------------------------*/
10169 /*--- ---*/
10170 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10171 /*--- ---*/
10172 /*------------------------------------------------------------*/
10174 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx,
10175 Long delta, Bool isAvx, UChar opc )
10177 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
10178 Int alen = 0;
10179 HChar dis_buf[50];
10180 IRTemp argL = newTemp(Ity_F64);
10181 IRTemp argR = newTemp(Ity_F64);
10182 UChar modrm = getUChar(delta);
10183 IRTemp addr = IRTemp_INVALID;
10184 if (epartIsReg(modrm)) {
10185 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10186 0/*lowest lane*/ ) );
10187 delta += 1;
10188 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10189 opc==0x2E ? "u" : "",
10190 nameXMMReg(eregOfRexRM(pfx,modrm)),
10191 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10192 } else {
10193 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10194 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10195 delta += alen;
10196 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10197 opc==0x2E ? "u" : "",
10198 dis_buf,
10199 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10201 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10202 0/*lowest lane*/ ) );
10204 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10205 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10206 stmt( IRStmt_Put(
10207 OFFB_CC_DEP1,
10208 binop( Iop_And64,
10209 unop( Iop_32Uto64,
10210 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10211 mkU64(0x45)
10212 )));
10213 return delta;
10217 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx,
10218 Long delta, Bool isAvx, UChar opc )
10220 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
10221 Int alen = 0;
10222 HChar dis_buf[50];
10223 IRTemp argL = newTemp(Ity_F32);
10224 IRTemp argR = newTemp(Ity_F32);
10225 UChar modrm = getUChar(delta);
10226 IRTemp addr = IRTemp_INVALID;
10227 if (epartIsReg(modrm)) {
10228 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10229 0/*lowest lane*/ ) );
10230 delta += 1;
10231 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10232 opc==0x2E ? "u" : "",
10233 nameXMMReg(eregOfRexRM(pfx,modrm)),
10234 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10235 } else {
10236 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10237 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10238 delta += alen;
10239 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10240 opc==0x2E ? "u" : "",
10241 dis_buf,
10242 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10244 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10245 0/*lowest lane*/ ) );
10247 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10248 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10249 stmt( IRStmt_Put(
10250 OFFB_CC_DEP1,
10251 binop( Iop_And64,
10252 unop( Iop_32Uto64,
10253 binop(Iop_CmpF64,
10254 unop(Iop_F32toF64,mkexpr(argL)),
10255 unop(Iop_F32toF64,mkexpr(argR)))),
10256 mkU64(0x45)
10257 )));
10258 return delta;
10262 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx,
10263 Long delta, Bool writesYmm )
10265 Int order;
10266 Int alen = 0;
10267 HChar dis_buf[50];
10268 IRTemp sV = newTemp(Ity_V128);
10269 UChar modrm = getUChar(delta);
10270 const HChar* strV = writesYmm ? "v" : "";
10271 IRTemp addr = IRTemp_INVALID;
10272 if (epartIsReg(modrm)) {
10273 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10274 order = (Int)getUChar(delta+1);
10275 delta += 1+1;
10276 DIP("%spshufd $%d,%s,%s\n", strV, order,
10277 nameXMMReg(eregOfRexRM(pfx,modrm)),
10278 nameXMMReg(gregOfRexRM(pfx,modrm)));
10279 } else {
10280 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10281 1/*byte after the amode*/ );
10282 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10283 order = (Int)getUChar(delta+alen);
10284 delta += alen+1;
10285 DIP("%spshufd $%d,%s,%s\n", strV, order,
10286 dis_buf,
10287 nameXMMReg(gregOfRexRM(pfx,modrm)));
10290 IRTemp s3, s2, s1, s0;
10291 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10292 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10294 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10295 IRTemp dV = newTemp(Ity_V128);
10296 assign(dV,
10297 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10298 SEL((order>>2)&3), SEL((order>>0)&3) )
10300 # undef SEL
10302 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10303 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10304 return delta;
10308 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
10310 Int order;
10311 Int alen = 0;
10312 HChar dis_buf[50];
10313 IRTemp sV = newTemp(Ity_V256);
10314 UChar modrm = getUChar(delta);
10315 IRTemp addr = IRTemp_INVALID;
10316 UInt rG = gregOfRexRM(pfx,modrm);
10317 if (epartIsReg(modrm)) {
10318 UInt rE = eregOfRexRM(pfx,modrm);
10319 assign( sV, getYMMReg(rE) );
10320 order = (Int)getUChar(delta+1);
10321 delta += 1+1;
10322 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10323 } else {
10324 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10325 1/*byte after the amode*/ );
10326 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10327 order = (Int)getUChar(delta+alen);
10328 delta += alen+1;
10329 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10332 IRTemp s[8];
10333 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10334 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10335 &s[3], &s[2], &s[1], &s[0] );
10337 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10338 s[4 + ((order>>4)&3)],
10339 s[4 + ((order>>2)&3)],
10340 s[4 + ((order>>0)&3)],
10341 s[0 + ((order>>6)&3)],
10342 s[0 + ((order>>4)&3)],
10343 s[0 + ((order>>2)&3)],
10344 s[0 + ((order>>0)&3)] ) );
10345 return delta;
10349 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10351 IRTemp dV = newTemp(Ity_V128);
10352 IRTemp hi64 = newTemp(Ity_I64);
10353 IRTemp lo64 = newTemp(Ity_I64);
10354 IRTemp hi64r = newTemp(Ity_I64);
10355 IRTemp lo64r = newTemp(Ity_I64);
10357 vassert(imm >= 0 && imm <= 255);
10358 if (imm >= 16) {
10359 assign(dV, mkV128(0x0000));
10360 return dV;
10363 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10364 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10366 if (imm == 0) {
10367 assign( lo64r, mkexpr(lo64) );
10368 assign( hi64r, mkexpr(hi64) );
10370 else
10371 if (imm == 8) {
10372 assign( hi64r, mkU64(0) );
10373 assign( lo64r, mkexpr(hi64) );
10375 else
10376 if (imm > 8) {
10377 assign( hi64r, mkU64(0) );
10378 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
10379 } else {
10380 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
10381 assign( lo64r,
10382 binop( Iop_Or64,
10383 binop(Iop_Shr64, mkexpr(lo64),
10384 mkU8(8 * imm)),
10385 binop(Iop_Shl64, mkexpr(hi64),
10386 mkU8(8 * (8 - imm)) )
10391 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10392 return dV;
10396 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10398 IRTemp dV = newTemp(Ity_V128);
10399 IRTemp hi64 = newTemp(Ity_I64);
10400 IRTemp lo64 = newTemp(Ity_I64);
10401 IRTemp hi64r = newTemp(Ity_I64);
10402 IRTemp lo64r = newTemp(Ity_I64);
10404 vassert(imm >= 0 && imm <= 255);
10405 if (imm >= 16) {
10406 assign(dV, mkV128(0x0000));
10407 return dV;
10410 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10411 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10413 if (imm == 0) {
10414 assign( lo64r, mkexpr(lo64) );
10415 assign( hi64r, mkexpr(hi64) );
10417 else
10418 if (imm == 8) {
10419 assign( lo64r, mkU64(0) );
10420 assign( hi64r, mkexpr(lo64) );
10422 else
10423 if (imm > 8) {
10424 assign( lo64r, mkU64(0) );
10425 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10426 } else {
10427 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10428 assign( hi64r,
10429 binop( Iop_Or64,
10430 binop(Iop_Shl64, mkexpr(hi64),
10431 mkU8(8 * imm)),
10432 binop(Iop_Shr64, mkexpr(lo64),
10433 mkU8(8 * (8 - imm)) )
10438 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10439 return dV;
10443 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx,
10444 Long delta, Bool isAvx, UChar opc, Int sz )
10446 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
10447 HChar dis_buf[50];
10448 Int alen = 0;
10449 UChar modrm = getUChar(delta);
10450 IRTemp addr = IRTemp_INVALID;
10451 IRTemp rmode = newTemp(Ity_I32);
10452 IRTemp f64lo = newTemp(Ity_F64);
10453 Bool r2zero = toBool(opc == 0x2C);
10455 if (epartIsReg(modrm)) {
10456 delta += 1;
10457 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10458 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10459 nameXMMReg(eregOfRexRM(pfx,modrm)),
10460 nameIReg(sz, gregOfRexRM(pfx,modrm),
10461 False));
10462 } else {
10463 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10464 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10465 delta += alen;
10466 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10467 dis_buf,
10468 nameIReg(sz, gregOfRexRM(pfx,modrm),
10469 False));
10472 if (r2zero) {
10473 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10474 } else {
10475 assign( rmode, get_sse_roundingmode() );
10478 if (sz == 4) {
10479 putIReg32( gregOfRexRM(pfx,modrm),
10480 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10481 } else {
10482 vassert(sz == 8);
10483 putIReg64( gregOfRexRM(pfx,modrm),
10484 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10487 return delta;
10491 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx,
10492 Long delta, Bool isAvx, UChar opc, Int sz )
10494 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10495 HChar dis_buf[50];
10496 Int alen = 0;
10497 UChar modrm = getUChar(delta);
10498 IRTemp addr = IRTemp_INVALID;
10499 IRTemp rmode = newTemp(Ity_I32);
10500 IRTemp f32lo = newTemp(Ity_F32);
10501 Bool r2zero = toBool(opc == 0x2C);
10503 if (epartIsReg(modrm)) {
10504 delta += 1;
10505 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10506 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10507 nameXMMReg(eregOfRexRM(pfx,modrm)),
10508 nameIReg(sz, gregOfRexRM(pfx,modrm),
10509 False));
10510 } else {
10511 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10512 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10513 delta += alen;
10514 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10515 dis_buf,
10516 nameIReg(sz, gregOfRexRM(pfx,modrm),
10517 False));
10520 if (r2zero) {
10521 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10522 } else {
10523 assign( rmode, get_sse_roundingmode() );
10526 if (sz == 4) {
10527 putIReg32( gregOfRexRM(pfx,modrm),
10528 binop( Iop_F64toI32S,
10529 mkexpr(rmode),
10530 unop(Iop_F32toF64, mkexpr(f32lo))) );
10531 } else {
10532 vassert(sz == 8);
10533 putIReg64( gregOfRexRM(pfx,modrm),
10534 binop( Iop_F64toI64S,
10535 mkexpr(rmode),
10536 unop(Iop_F32toF64, mkexpr(f32lo))) );
10539 return delta;
10543 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
10544 Long delta, Bool isAvx )
10546 IRTemp addr = IRTemp_INVALID;
10547 Int alen = 0;
10548 HChar dis_buf[50];
10549 IRTemp f32lo = newTemp(Ity_F32);
10550 IRTemp f32hi = newTemp(Ity_F32);
10551 UChar modrm = getUChar(delta);
10552 UInt rG = gregOfRexRM(pfx,modrm);
10553 if (epartIsReg(modrm)) {
10554 UInt rE = eregOfRexRM(pfx,modrm);
10555 assign( f32lo, getXMMRegLane32F(rE, 0) );
10556 assign( f32hi, getXMMRegLane32F(rE, 1) );
10557 delta += 1;
10558 DIP("%scvtps2pd %s,%s\n",
10559 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10560 } else {
10561 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10562 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10563 assign( f32hi, loadLE(Ity_F32,
10564 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10565 delta += alen;
10566 DIP("%scvtps2pd %s,%s\n",
10567 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10570 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10571 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10572 if (isAvx)
10573 putYMMRegLane128( rG, 1, mkV128(0));
10574 return delta;
10578 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
10579 Long delta )
10581 IRTemp addr = IRTemp_INVALID;
10582 Int alen = 0;
10583 HChar dis_buf[50];
10584 IRTemp f32_0 = newTemp(Ity_F32);
10585 IRTemp f32_1 = newTemp(Ity_F32);
10586 IRTemp f32_2 = newTemp(Ity_F32);
10587 IRTemp f32_3 = newTemp(Ity_F32);
10588 UChar modrm = getUChar(delta);
10589 UInt rG = gregOfRexRM(pfx,modrm);
10590 if (epartIsReg(modrm)) {
10591 UInt rE = eregOfRexRM(pfx,modrm);
10592 assign( f32_0, getXMMRegLane32F(rE, 0) );
10593 assign( f32_1, getXMMRegLane32F(rE, 1) );
10594 assign( f32_2, getXMMRegLane32F(rE, 2) );
10595 assign( f32_3, getXMMRegLane32F(rE, 3) );
10596 delta += 1;
10597 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10598 } else {
10599 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10600 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10601 assign( f32_1, loadLE(Ity_F32,
10602 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10603 assign( f32_2, loadLE(Ity_F32,
10604 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10605 assign( f32_3, loadLE(Ity_F32,
10606 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10607 delta += alen;
10608 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10611 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10612 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10613 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10614 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10615 return delta;
10619 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10620 Long delta, Bool isAvx )
10622 IRTemp addr = IRTemp_INVALID;
10623 Int alen = 0;
10624 HChar dis_buf[50];
10625 UChar modrm = getUChar(delta);
10626 UInt rG = gregOfRexRM(pfx,modrm);
10627 IRTemp argV = newTemp(Ity_V128);
10628 IRTemp rmode = newTemp(Ity_I32);
10629 if (epartIsReg(modrm)) {
10630 UInt rE = eregOfRexRM(pfx,modrm);
10631 assign( argV, getXMMReg(rE) );
10632 delta += 1;
10633 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10634 nameXMMReg(rE), nameXMMReg(rG));
10635 } else {
10636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10637 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10638 delta += alen;
10639 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10640 dis_buf, nameXMMReg(rG) );
10643 assign( rmode, get_sse_roundingmode() );
10644 IRTemp t0 = newTemp(Ity_F64);
10645 IRTemp t1 = newTemp(Ity_F64);
10646 assign( t0, unop(Iop_ReinterpI64asF64,
10647 unop(Iop_V128to64, mkexpr(argV))) );
10648 assign( t1, unop(Iop_ReinterpI64asF64,
10649 unop(Iop_V128HIto64, mkexpr(argV))) );
10651 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10652 putXMMRegLane32( rG, 3, mkU32(0) );
10653 putXMMRegLane32( rG, 2, mkU32(0) );
10654 putXMMRegLane32F( rG, 1, CVT(t1) );
10655 putXMMRegLane32F( rG, 0, CVT(t0) );
10656 # undef CVT
10657 if (isAvx)
10658 putYMMRegLane128( rG, 1, mkV128(0) );
10660 return delta;
10664 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10665 Long delta, Bool isAvx, Bool r2zero )
10667 IRTemp addr = IRTemp_INVALID;
10668 Int alen = 0;
10669 HChar dis_buf[50];
10670 UChar modrm = getUChar(delta);
10671 IRTemp argV = newTemp(Ity_V128);
10672 IRTemp rmode = newTemp(Ity_I32);
10673 UInt rG = gregOfRexRM(pfx,modrm);
10674 IRTemp t0, t1, t2, t3;
10676 if (epartIsReg(modrm)) {
10677 UInt rE = eregOfRexRM(pfx,modrm);
10678 assign( argV, getXMMReg(rE) );
10679 delta += 1;
10680 DIP("%scvt%sps2dq %s,%s\n",
10681 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10682 } else {
10683 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10684 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10685 delta += alen;
10686 DIP("%scvt%sps2dq %s,%s\n",
10687 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10690 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10691 : get_sse_roundingmode() );
10692 t0 = t1 = t2 = t3 = IRTemp_INVALID;
10693 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
10694 /* This is less than ideal. If it turns out to be a performance
10695 bottleneck it can be improved. */
10696 # define CVT(_t) \
10697 binop( Iop_F64toI32S, \
10698 mkexpr(rmode), \
10699 unop( Iop_F32toF64, \
10700 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10702 putXMMRegLane32( rG, 3, CVT(t3) );
10703 putXMMRegLane32( rG, 2, CVT(t2) );
10704 putXMMRegLane32( rG, 1, CVT(t1) );
10705 putXMMRegLane32( rG, 0, CVT(t0) );
10706 # undef CVT
10707 if (isAvx)
10708 putYMMRegLane128( rG, 1, mkV128(0) );
10710 return delta;
10714 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10715 Long delta, Bool r2zero )
10717 IRTemp addr = IRTemp_INVALID;
10718 Int alen = 0;
10719 HChar dis_buf[50];
10720 UChar modrm = getUChar(delta);
10721 IRTemp argV = newTemp(Ity_V256);
10722 IRTemp rmode = newTemp(Ity_I32);
10723 UInt rG = gregOfRexRM(pfx,modrm);
10724 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10726 if (epartIsReg(modrm)) {
10727 UInt rE = eregOfRexRM(pfx,modrm);
10728 assign( argV, getYMMReg(rE) );
10729 delta += 1;
10730 DIP("vcvt%sps2dq %s,%s\n",
10731 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10732 } else {
10733 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10734 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10735 delta += alen;
10736 DIP("vcvt%sps2dq %s,%s\n",
10737 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10740 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10741 : get_sse_roundingmode() );
10742 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID;
10743 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
10744 /* This is less than ideal. If it turns out to be a performance
10745 bottleneck it can be improved. */
10746 # define CVT(_t) \
10747 binop( Iop_F64toI32S, \
10748 mkexpr(rmode), \
10749 unop( Iop_F32toF64, \
10750 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10752 putYMMRegLane32( rG, 7, CVT(t7) );
10753 putYMMRegLane32( rG, 6, CVT(t6) );
10754 putYMMRegLane32( rG, 5, CVT(t5) );
10755 putYMMRegLane32( rG, 4, CVT(t4) );
10756 putYMMRegLane32( rG, 3, CVT(t3) );
10757 putYMMRegLane32( rG, 2, CVT(t2) );
10758 putYMMRegLane32( rG, 1, CVT(t1) );
10759 putYMMRegLane32( rG, 0, CVT(t0) );
10760 # undef CVT
10762 return delta;
10766 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10767 Long delta, Bool isAvx, Bool r2zero )
10769 IRTemp addr = IRTemp_INVALID;
10770 Int alen = 0;
10771 HChar dis_buf[50];
10772 UChar modrm = getUChar(delta);
10773 IRTemp argV = newTemp(Ity_V128);
10774 IRTemp rmode = newTemp(Ity_I32);
10775 UInt rG = gregOfRexRM(pfx,modrm);
10776 IRTemp t0, t1;
10778 if (epartIsReg(modrm)) {
10779 UInt rE = eregOfRexRM(pfx,modrm);
10780 assign( argV, getXMMReg(rE) );
10781 delta += 1;
10782 DIP("%scvt%spd2dq %s,%s\n",
10783 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10784 } else {
10785 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10786 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10787 delta += alen;
10788 DIP("%scvt%spd2dqx %s,%s\n",
10789 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10792 if (r2zero) {
10793 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10794 } else {
10795 assign( rmode, get_sse_roundingmode() );
10798 t0 = newTemp(Ity_F64);
10799 t1 = newTemp(Ity_F64);
10800 assign( t0, unop(Iop_ReinterpI64asF64,
10801 unop(Iop_V128to64, mkexpr(argV))) );
10802 assign( t1, unop(Iop_ReinterpI64asF64,
10803 unop(Iop_V128HIto64, mkexpr(argV))) );
10805 # define CVT(_t) binop( Iop_F64toI32S, \
10806 mkexpr(rmode), \
10807 mkexpr(_t) )
10809 putXMMRegLane32( rG, 3, mkU32(0) );
10810 putXMMRegLane32( rG, 2, mkU32(0) );
10811 putXMMRegLane32( rG, 1, CVT(t1) );
10812 putXMMRegLane32( rG, 0, CVT(t0) );
10813 # undef CVT
10814 if (isAvx)
10815 putYMMRegLane128( rG, 1, mkV128(0) );
10817 return delta;
10821 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10822 Long delta, Bool r2zero )
10824 IRTemp addr = IRTemp_INVALID;
10825 Int alen = 0;
10826 HChar dis_buf[50];
10827 UChar modrm = getUChar(delta);
10828 IRTemp argV = newTemp(Ity_V256);
10829 IRTemp rmode = newTemp(Ity_I32);
10830 UInt rG = gregOfRexRM(pfx,modrm);
10831 IRTemp t0, t1, t2, t3;
10833 if (epartIsReg(modrm)) {
10834 UInt rE = eregOfRexRM(pfx,modrm);
10835 assign( argV, getYMMReg(rE) );
10836 delta += 1;
10837 DIP("vcvt%spd2dq %s,%s\n",
10838 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10839 } else {
10840 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10841 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10842 delta += alen;
10843 DIP("vcvt%spd2dqy %s,%s\n",
10844 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10847 if (r2zero) {
10848 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10849 } else {
10850 assign( rmode, get_sse_roundingmode() );
10853 t0 = IRTemp_INVALID;
10854 t1 = IRTemp_INVALID;
10855 t2 = IRTemp_INVALID;
10856 t3 = IRTemp_INVALID;
10857 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10859 # define CVT(_t) binop( Iop_F64toI32S, \
10860 mkexpr(rmode), \
10861 unop( Iop_ReinterpI64asF64, \
10862 mkexpr(_t) ) )
10864 putXMMRegLane32( rG, 3, CVT(t3) );
10865 putXMMRegLane32( rG, 2, CVT(t2) );
10866 putXMMRegLane32( rG, 1, CVT(t1) );
10867 putXMMRegLane32( rG, 0, CVT(t0) );
10868 # undef CVT
10869 putYMMRegLane128( rG, 1, mkV128(0) );
10871 return delta;
10875 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10876 Long delta, Bool isAvx )
10878 IRTemp addr = IRTemp_INVALID;
10879 Int alen = 0;
10880 HChar dis_buf[50];
10881 UChar modrm = getUChar(delta);
10882 IRTemp argV = newTemp(Ity_V128);
10883 IRTemp rmode = newTemp(Ity_I32);
10884 UInt rG = gregOfRexRM(pfx,modrm);
10885 IRTemp t0, t1, t2, t3;
10887 if (epartIsReg(modrm)) {
10888 UInt rE = eregOfRexRM(pfx,modrm);
10889 assign( argV, getXMMReg(rE) );
10890 delta += 1;
10891 DIP("%scvtdq2ps %s,%s\n",
10892 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10893 } else {
10894 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10895 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10896 delta += alen;
10897 DIP("%scvtdq2ps %s,%s\n",
10898 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
10901 assign( rmode, get_sse_roundingmode() );
10902 t0 = IRTemp_INVALID;
10903 t1 = IRTemp_INVALID;
10904 t2 = IRTemp_INVALID;
10905 t3 = IRTemp_INVALID;
10906 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
10908 # define CVT(_t) binop( Iop_F64toF32, \
10909 mkexpr(rmode), \
10910 unop(Iop_I32StoF64,mkexpr(_t)))
10912 putXMMRegLane32F( rG, 3, CVT(t3) );
10913 putXMMRegLane32F( rG, 2, CVT(t2) );
10914 putXMMRegLane32F( rG, 1, CVT(t1) );
10915 putXMMRegLane32F( rG, 0, CVT(t0) );
10916 # undef CVT
10917 if (isAvx)
10918 putYMMRegLane128( rG, 1, mkV128(0) );
10920 return delta;
10923 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
10924 Long delta )
10926 IRTemp addr = IRTemp_INVALID;
10927 Int alen = 0;
10928 HChar dis_buf[50];
10929 UChar modrm = getUChar(delta);
10930 IRTemp argV = newTemp(Ity_V256);
10931 IRTemp rmode = newTemp(Ity_I32);
10932 UInt rG = gregOfRexRM(pfx,modrm);
10933 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10935 if (epartIsReg(modrm)) {
10936 UInt rE = eregOfRexRM(pfx,modrm);
10937 assign( argV, getYMMReg(rE) );
10938 delta += 1;
10939 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
10940 } else {
10941 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10942 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10943 delta += alen;
10944 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
10947 assign( rmode, get_sse_roundingmode() );
10948 t0 = IRTemp_INVALID;
10949 t1 = IRTemp_INVALID;
10950 t2 = IRTemp_INVALID;
10951 t3 = IRTemp_INVALID;
10952 t4 = IRTemp_INVALID;
10953 t5 = IRTemp_INVALID;
10954 t6 = IRTemp_INVALID;
10955 t7 = IRTemp_INVALID;
10956 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
10958 # define CVT(_t) binop( Iop_F64toF32, \
10959 mkexpr(rmode), \
10960 unop(Iop_I32StoF64,mkexpr(_t)))
10962 putYMMRegLane32F( rG, 7, CVT(t7) );
10963 putYMMRegLane32F( rG, 6, CVT(t6) );
10964 putYMMRegLane32F( rG, 5, CVT(t5) );
10965 putYMMRegLane32F( rG, 4, CVT(t4) );
10966 putYMMRegLane32F( rG, 3, CVT(t3) );
10967 putYMMRegLane32F( rG, 2, CVT(t2) );
10968 putYMMRegLane32F( rG, 1, CVT(t1) );
10969 putYMMRegLane32F( rG, 0, CVT(t0) );
10970 # undef CVT
10972 return delta;
10976 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx,
10977 Long delta, Bool isAvx )
10979 UChar modrm = getUChar(delta);
10980 vassert(epartIsReg(modrm)); /* ensured by caller */
10981 UInt rE = eregOfRexRM(pfx,modrm);
10982 UInt rG = gregOfRexRM(pfx,modrm);
10983 IRTemp t0 = newTemp(Ity_V128);
10984 IRTemp t1 = newTemp(Ity_I32);
10985 assign(t0, getXMMReg(rE));
10986 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
10987 putIReg32(rG, mkexpr(t1));
10988 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
10989 nameIReg32(rG));
10990 delta += 1;
10991 return delta;
10995 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx,
10996 Long delta )
10998 UChar modrm = getUChar(delta);
10999 vassert(epartIsReg(modrm)); /* ensured by caller */
11000 UInt rE = eregOfRexRM(pfx,modrm);
11001 UInt rG = gregOfRexRM(pfx,modrm);
11002 IRTemp t0 = newTemp(Ity_V128);
11003 IRTemp t1 = newTemp(Ity_V128);
11004 IRTemp t2 = newTemp(Ity_I16);
11005 IRTemp t3 = newTemp(Ity_I16);
11006 assign(t0, getYMMRegLane128(rE, 0));
11007 assign(t1, getYMMRegLane128(rE, 1));
11008 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
11009 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
11010 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
11011 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11012 delta += 1;
11013 return delta;
11017 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
11018 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
11019 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
11020 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11022 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11023 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11024 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11025 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11026 IRTemp res = newTemp(Ity_V128);
11027 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
11028 : mkV128from32s( s1, d1, s0, d0 ));
11029 return res;
11033 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11034 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11035 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11037 IRTemp s1 = newTemp(Ity_I64);
11038 IRTemp s0 = newTemp(Ity_I64);
11039 IRTemp d1 = newTemp(Ity_I64);
11040 IRTemp d0 = newTemp(Ity_I64);
11041 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11042 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11043 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11044 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11045 IRTemp res = newTemp(Ity_V128);
11046 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
11047 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
11048 return res;
11052 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11053 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11054 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11055 way. */
11056 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11058 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11059 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11060 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
11061 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
11062 IRTemp res = newTemp(Ity_V256);
11063 assign(res, xIsH
11064 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
11065 mkexpr(s1), mkexpr(d1))
11066 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
11067 mkexpr(s0), mkexpr(d0)));
11068 return res;
11072 /* FIXME: this is really bad. Surely can do something better here?
11073 One observation is that the steering in the upper and lower 128 bit
11074 halves is the same as with math_UNPCKxPS_128, so we simply split
11075 into two halves, and use that. Consequently any improvement in
11076 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11077 benefits this too. */
11078 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11080 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11081 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11082 breakupV256toV128s( sV, &sVhi, &sVlo );
11083 breakupV256toV128s( dV, &dVhi, &dVlo );
11084 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
11085 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
11086 IRTemp rV = newTemp(Ity_V256);
11087 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11088 return rV;
11092 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11094 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11095 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11096 vassert(imm8 < 256);
11098 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11099 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11101 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11102 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11103 IRTemp res = newTemp(Ity_V128);
11104 assign(res,
11105 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
11106 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
11107 # undef SELD
11108 # undef SELS
11109 return res;
11113 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11114 identically. Hence do the clueless thing and use math_SHUFPS_128
11115 twice. */
11116 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11118 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11119 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11120 breakupV256toV128s( sV, &sVhi, &sVlo );
11121 breakupV256toV128s( dV, &dVhi, &dVlo );
11122 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
11123 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
11124 IRTemp rV = newTemp(Ity_V256);
11125 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11126 return rV;
11130 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11132 IRTemp s1 = newTemp(Ity_I64);
11133 IRTemp s0 = newTemp(Ity_I64);
11134 IRTemp d1 = newTemp(Ity_I64);
11135 IRTemp d0 = newTemp(Ity_I64);
11137 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11138 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11139 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11140 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11142 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11143 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11145 IRTemp res = newTemp(Ity_V128);
11146 assign(res, binop( Iop_64HLtoV128,
11147 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
11149 # undef SELD
11150 # undef SELS
11151 return res;
11155 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11157 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11158 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11159 breakupV256toV128s( sV, &sVhi, &sVlo );
11160 breakupV256toV128s( dV, &dVhi, &dVlo );
11161 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11162 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
11163 IRTemp rV = newTemp(Ity_V256);
11164 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11165 return rV;
11169 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11171 UShort imm8_mask_16;
11172 IRTemp imm8_mask = newTemp(Ity_V128);
11174 switch( imm8 & 3 ) {
11175 case 0: imm8_mask_16 = 0x0000; break;
11176 case 1: imm8_mask_16 = 0x00FF; break;
11177 case 2: imm8_mask_16 = 0xFF00; break;
11178 case 3: imm8_mask_16 = 0xFFFF; break;
11179 default: vassert(0); break;
11181 assign( imm8_mask, mkV128( imm8_mask_16 ) );
11183 IRTemp res = newTemp(Ity_V128);
11184 assign ( res, binop( Iop_OrV128,
11185 binop( Iop_AndV128, mkexpr(sV),
11186 mkexpr(imm8_mask) ),
11187 binop( Iop_AndV128, mkexpr(dV),
11188 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11189 return res;
11193 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11195 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11196 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11197 breakupV256toV128s( sV, &sVhi, &sVlo );
11198 breakupV256toV128s( dV, &dVhi, &dVlo );
11199 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11200 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
11201 IRTemp rV = newTemp(Ity_V256);
11202 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11203 return rV;
11207 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11209 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11210 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11211 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11212 0xFFFF };
11213 IRTemp imm8_mask = newTemp(Ity_V128);
11214 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
11216 IRTemp res = newTemp(Ity_V128);
11217 assign ( res, binop( Iop_OrV128,
11218 binop( Iop_AndV128, mkexpr(sV),
11219 mkexpr(imm8_mask) ),
11220 binop( Iop_AndV128, mkexpr(dV),
11221 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11222 return res;
11226 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11228 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11229 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11230 breakupV256toV128s( sV, &sVhi, &sVlo );
11231 breakupV256toV128s( dV, &dVhi, &dVlo );
11232 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11233 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11234 IRTemp rV = newTemp(Ity_V256);
11235 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11236 return rV;
11240 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11242 /* Make w be a 16-bit version of imm8, formed by duplicating each
11243 bit in imm8. */
11244 Int i;
11245 UShort imm16 = 0;
11246 for (i = 0; i < 8; i++) {
11247 if (imm8 & (1 << i))
11248 imm16 |= (3 << (2*i));
11250 IRTemp imm16_mask = newTemp(Ity_V128);
11251 assign( imm16_mask, mkV128( imm16 ));
11253 IRTemp res = newTemp(Ity_V128);
11254 assign ( res, binop( Iop_OrV128,
11255 binop( Iop_AndV128, mkexpr(sV),
11256 mkexpr(imm16_mask) ),
11257 binop( Iop_AndV128, mkexpr(dV),
11258 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11259 return res;
11263 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11265 /* This is a really poor translation -- could be improved if
11266 performance critical */
11267 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11268 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11269 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11270 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11271 IRTemp res = newTemp(Ity_V128);
11272 assign(res, binop(Iop_64HLtoV128,
11273 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11274 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11275 return res;
11279 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11281 /* This is a really poor translation -- could be improved if
11282 performance critical */
11283 IRTemp sHi, sLo, dHi, dLo;
11284 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11285 breakupV256toV128s( dV, &dHi, &dLo);
11286 breakupV256toV128s( sV, &sHi, &sLo);
11287 IRTemp res = newTemp(Ity_V256);
11288 assign(res, binop(Iop_V128HLtoV256,
11289 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11290 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11291 return res;
11295 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11297 /* This is a really poor translation -- could be improved if
11298 performance critical */
11299 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11300 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11301 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11302 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11303 IRTemp res = newTemp(Ity_V128);
11304 assign(res, binop(Iop_64HLtoV128,
11305 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11306 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11307 return res;
11311 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11313 /* This is a really poor translation -- could be improved if
11314 performance critical */
11315 IRTemp sHi, sLo, dHi, dLo;
11316 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11317 breakupV256toV128s( dV, &dHi, &dLo);
11318 breakupV256toV128s( sV, &sHi, &sLo);
11319 IRTemp res = newTemp(Ity_V256);
11320 assign(res, binop(Iop_V128HLtoV256,
11321 mkexpr(math_PMULDQ_128(sHi, dHi)),
11322 mkexpr(math_PMULDQ_128(sLo, dLo))));
11323 return res;
11327 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11329 IRTemp sVhi, sVlo, dVhi, dVlo;
11330 IRTemp resHi = newTemp(Ity_I64);
11331 IRTemp resLo = newTemp(Ity_I64);
11332 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11333 breakupV128to64s( sV, &sVhi, &sVlo );
11334 breakupV128to64s( dV, &dVhi, &dVlo );
11335 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11336 "amd64g_calculate_mmx_pmaddwd",
11337 &amd64g_calculate_mmx_pmaddwd,
11338 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11339 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11340 "amd64g_calculate_mmx_pmaddwd",
11341 &amd64g_calculate_mmx_pmaddwd,
11342 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11343 IRTemp res = newTemp(Ity_V128);
11344 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11345 return res;
11349 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11351 IRTemp sHi, sLo, dHi, dLo;
11352 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11353 breakupV256toV128s( dV, &dHi, &dLo);
11354 breakupV256toV128s( sV, &sHi, &sLo);
11355 IRTemp res = newTemp(Ity_V256);
11356 assign(res, binop(Iop_V128HLtoV256,
11357 mkexpr(math_PMADDWD_128(dHi, sHi)),
11358 mkexpr(math_PMADDWD_128(dLo, sLo))));
11359 return res;
11363 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11365 IRTemp addV = newTemp(Ity_V128);
11366 IRTemp subV = newTemp(Ity_V128);
11367 IRTemp a1 = newTemp(Ity_I64);
11368 IRTemp s0 = newTemp(Ity_I64);
11369 IRTemp rm = newTemp(Ity_I32);
11371 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11372 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11373 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11375 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11376 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11378 IRTemp res = newTemp(Ity_V128);
11379 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11380 return res;
11384 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11386 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11387 IRTemp addV = newTemp(Ity_V256);
11388 IRTemp subV = newTemp(Ity_V256);
11389 IRTemp rm = newTemp(Ity_I32);
11390 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11392 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11393 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11394 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11396 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11397 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11399 IRTemp res = newTemp(Ity_V256);
11400 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11401 return res;
11405 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11407 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11408 IRTemp addV = newTemp(Ity_V128);
11409 IRTemp subV = newTemp(Ity_V128);
11410 IRTemp rm = newTemp(Ity_I32);
11411 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11413 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11414 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11415 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11417 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11418 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11420 IRTemp res = newTemp(Ity_V128);
11421 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11422 return res;
11426 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11428 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11429 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11430 IRTemp addV = newTemp(Ity_V256);
11431 IRTemp subV = newTemp(Ity_V256);
11432 IRTemp rm = newTemp(Ity_I32);
11433 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11434 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11436 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11437 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11438 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11440 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11441 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11443 IRTemp res = newTemp(Ity_V256);
11444 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11445 return res;
11449 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11450 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx,
11451 Long delta, Bool isAvx, Bool xIsH )
11453 IRTemp addr = IRTemp_INVALID;
11454 Int alen = 0;
11455 HChar dis_buf[50];
11456 UChar modrm = getUChar(delta);
11457 UInt rG = gregOfRexRM(pfx,modrm);
11458 UInt imm8;
11459 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11460 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11461 sV = newTemp(Ity_V128);
11462 dV = newTemp(Ity_V128);
11463 sVmut = newTemp(Ity_I64);
11464 dVmut = newTemp(Ity_I64);
11465 sVcon = newTemp(Ity_I64);
11466 if (epartIsReg(modrm)) {
11467 UInt rE = eregOfRexRM(pfx,modrm);
11468 assign( sV, getXMMReg(rE) );
11469 imm8 = (UInt)getUChar(delta+1);
11470 delta += 1+1;
11471 DIP("%spshuf%cw $%u,%s,%s\n",
11472 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11473 imm8, nameXMMReg(rE), nameXMMReg(rG));
11474 } else {
11475 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11476 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11477 imm8 = (UInt)getUChar(delta+alen);
11478 delta += alen+1;
11479 DIP("%spshuf%cw $%u,%s,%s\n",
11480 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11481 imm8, dis_buf, nameXMMReg(rG));
11484 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11485 source. */
11486 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11487 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11489 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11490 # define SEL(n) \
11491 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11492 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11493 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11494 # undef SEL
11496 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11497 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11499 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11500 return delta;
11504 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11505 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx,
11506 Long delta, Bool xIsH )
11508 IRTemp addr = IRTemp_INVALID;
11509 Int alen = 0;
11510 HChar dis_buf[50];
11511 UChar modrm = getUChar(delta);
11512 UInt rG = gregOfRexRM(pfx,modrm);
11513 UInt imm8;
11514 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11515 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11516 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11517 sV = newTemp(Ity_V256);
11518 dVhi = newTemp(Ity_I64);
11519 dVlo = newTemp(Ity_I64);
11520 if (epartIsReg(modrm)) {
11521 UInt rE = eregOfRexRM(pfx,modrm);
11522 assign( sV, getYMMReg(rE) );
11523 imm8 = (UInt)getUChar(delta+1);
11524 delta += 1+1;
11525 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11526 imm8, nameYMMReg(rE), nameYMMReg(rG));
11527 } else {
11528 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11529 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11530 imm8 = (UInt)getUChar(delta+alen);
11531 delta += alen+1;
11532 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11533 imm8, dis_buf, nameYMMReg(rG));
11536 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11537 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11538 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11540 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11541 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11542 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11543 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11544 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11545 xIsH ? sV64[2] : dVhi,
11546 xIsH ? dVlo : sV64[1],
11547 xIsH ? sV64[0] : dVlo ) );
11548 return delta;
11552 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx,
11553 Long delta, Bool isAvx )
11555 Long deltaIN = delta;
11556 UChar modrm = getUChar(delta);
11557 UInt rG = gregOfRexRM(pfx,modrm);
11558 IRTemp sV = newTemp(Ity_V128);
11559 IRTemp d16 = newTemp(Ity_I16);
11560 UInt imm8;
11561 IRTemp s0, s1, s2, s3;
11562 if (epartIsReg(modrm)) {
11563 UInt rE = eregOfRexRM(pfx,modrm);
11564 assign(sV, getXMMReg(rE));
11565 imm8 = getUChar(delta+1) & 7;
11566 delta += 1+1;
11567 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "",
11568 imm8, nameXMMReg(rE), nameIReg32(rG));
11569 } else {
11570 /* The memory case is disallowed, apparently. */
11571 return deltaIN; /* FAIL */
11573 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11574 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11575 switch (imm8) {
11576 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11577 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11578 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11579 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11580 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11581 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11582 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11583 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11584 default: vassert(0);
11586 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11587 return delta;
11591 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
11592 Long delta, Bool isAvx )
11594 IRTemp addr = IRTemp_INVALID;
11595 Int alen = 0;
11596 HChar dis_buf[50];
11597 UChar modrm = getUChar(delta);
11598 IRTemp arg64 = newTemp(Ity_I64);
11599 UInt rG = gregOfRexRM(pfx,modrm);
11600 const HChar* mbV = isAvx ? "v" : "";
11601 if (epartIsReg(modrm)) {
11602 UInt rE = eregOfRexRM(pfx,modrm);
11603 assign( arg64, getXMMRegLane64(rE, 0) );
11604 delta += 1;
11605 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11606 } else {
11607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11608 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11609 delta += alen;
11610 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11612 putXMMRegLane64F(
11613 rG, 0,
11614 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11616 putXMMRegLane64F(
11617 rG, 1,
11618 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11620 if (isAvx)
11621 putYMMRegLane128(rG, 1, mkV128(0));
11622 return delta;
11626 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11627 Long delta, Bool isAvx )
11629 IRTemp addr = IRTemp_INVALID;
11630 Int alen = 0;
11631 HChar dis_buf[50];
11632 UChar modrm = getUChar(delta);
11633 vassert(!epartIsReg(modrm)); /* ensured by caller */
11634 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11637 delta += alen;
11639 /* Fake up a native SSE mxcsr word. The only thing it depends on
11640 is SSEROUND[1:0], so call a clean helper to cook it up.
11642 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11643 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11644 storeLE(
11645 mkexpr(addr),
11646 unop(Iop_64to32,
11647 mkIRExprCCall(
11648 Ity_I64, 0/*regp*/,
11649 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11650 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11654 return delta;
11658 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11659 Long delta, Bool isAvx )
11661 IRTemp addr = IRTemp_INVALID;
11662 Int alen = 0;
11663 HChar dis_buf[50];
11664 UChar modrm = getUChar(delta);
11665 vassert(!epartIsReg(modrm)); /* ensured by caller */
11666 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11668 IRTemp t64 = newTemp(Ity_I64);
11669 IRTemp ew = newTemp(Ity_I32);
11671 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11672 delta += alen;
11673 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11675 /* The only thing we observe in %mxcsr is the rounding mode.
11676 Therefore, pass the 32-bit value (SSE native-format control
11677 word) to a clean helper, getting back a 64-bit value, the
11678 lower half of which is the SSEROUND value to store, and the
11679 upper half of which is the emulation-warning token which may
11680 be generated.
11682 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11683 assign( t64, mkIRExprCCall(
11684 Ity_I64, 0/*regparms*/,
11685 "amd64g_check_ldmxcsr",
11686 &amd64g_check_ldmxcsr,
11687 mkIRExprVec_1(
11688 unop(Iop_32Uto64,
11689 loadLE(Ity_I32, mkexpr(addr))
11695 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11696 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11697 put_emwarn( mkexpr(ew) );
11698 /* Finally, if an emulation warning was reported, side-exit to
11699 the next insn, reporting the warning, so that Valgrind's
11700 dispatcher sees the warning. */
11701 stmt(
11702 IRStmt_Exit(
11703 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11704 Ijk_EmWarn,
11705 IRConst_U64(guest_RIP_bbstart+delta),
11706 OFFB_RIP
11709 return delta;
11713 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm )
11715 /* ------ rfbm[0] gates the x87 state ------ */
11717 /* Uses dirty helper:
11718 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11720 IRDirty* d0 = unsafeIRDirty_0_N (
11721 0/*regparms*/,
11722 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11723 &amd64g_dirtyhelper_XSAVE_COMPONENT_0,
11724 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11726 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)),
11727 mkU64(1));
11729 /* Declare we're writing memory. Really, bytes 24 through 31
11730 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11731 than 1 memory area here, so just mark the whole thing as
11732 written. */
11733 d0->mFx = Ifx_Write;
11734 d0->mAddr = mkexpr(addr);
11735 d0->mSize = 160;
11737 /* declare we're reading guest state */
11738 d0->nFxState = 5;
11739 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11741 d0->fxState[0].fx = Ifx_Read;
11742 d0->fxState[0].offset = OFFB_FTOP;
11743 d0->fxState[0].size = sizeof(UInt);
11745 d0->fxState[1].fx = Ifx_Read;
11746 d0->fxState[1].offset = OFFB_FPREGS;
11747 d0->fxState[1].size = 8 * sizeof(ULong);
11749 d0->fxState[2].fx = Ifx_Read;
11750 d0->fxState[2].offset = OFFB_FPTAGS;
11751 d0->fxState[2].size = 8 * sizeof(UChar);
11753 d0->fxState[3].fx = Ifx_Read;
11754 d0->fxState[3].offset = OFFB_FPROUND;
11755 d0->fxState[3].size = sizeof(ULong);
11757 d0->fxState[4].fx = Ifx_Read;
11758 d0->fxState[4].offset = OFFB_FC3210;
11759 d0->fxState[4].size = sizeof(ULong);
11761 stmt( IRStmt_Dirty(d0) );
11763 /* ------ rfbm[1] gates the SSE state ------ */
11765 IRTemp rfbm_1 = newTemp(Ity_I64);
11766 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11767 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11768 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11770 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2));
11771 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11773 /* Uses dirty helper:
11774 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11775 ( VexGuestAMD64State*, ULong )
11776 This creates only MXCSR and MXCSR_MASK. We need to do this if
11777 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11778 guard condition is a bit more complex.
11780 IRDirty* d1 = unsafeIRDirty_0_N (
11781 0/*regparms*/,
11782 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11783 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS,
11784 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11786 d1->guard = guard_1or2;
11788 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11789 the code for rbfm[0] just above claims a write of 0 .. 159, so
11790 this duplicates it. But at least correctly connects 24 .. 31 to
11791 the MXCSR guest state representation (SSEROUND field). */
11792 d1->mFx = Ifx_Write;
11793 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
11794 d1->mSize = 8;
11796 /* declare we're reading guest state */
11797 d1->nFxState = 1;
11798 vex_bzero(&d1->fxState, sizeof(d1->fxState));
11800 d1->fxState[0].fx = Ifx_Read;
11801 d1->fxState[0].offset = OFFB_SSEROUND;
11802 d1->fxState[0].size = sizeof(ULong);
11804 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11805 else. We do the actual register array, XMM[0..15], separately,
11806 in order that any undefinedness in the XMM registers is tracked
11807 separately by Memcheck and does not "infect" the in-memory
11808 shadow for the other parts of the image. */
11809 stmt( IRStmt_Dirty(d1) );
11811 /* And now the XMMs themselves. */
11812 UInt reg;
11813 for (reg = 0; reg < 16; reg++) {
11814 stmt( IRStmt_StoreG(
11815 Iend_LE,
11816 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)),
11817 getXMMReg(reg),
11818 guard_1
11822 /* ------ rfbm[2] gates the AVX state ------ */
11823 /* Component 2 is just a bunch of register saves, so we'll do it
11824 inline, just to be simple and to be Memcheck friendly. */
11826 IRTemp rfbm_2 = newTemp(Ity_I64);
11827 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
11829 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4));
11831 for (reg = 0; reg < 16; reg++) {
11832 stmt( IRStmt_StoreG(
11833 Iend_LE,
11834 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)),
11835 getYMMRegLane128(reg,1),
11836 guard_2
11842 static Long dis_XSAVE ( const VexAbiInfo* vbi,
11843 Prefix pfx, Long delta, Int sz )
11845 /* Note that the presence or absence of REX.W (indicated here by
11846 |sz|) slightly affects the written format: whether the saved FPU
11847 IP and DP pointers are 64 or 32 bits. But the helper function
11848 we call simply writes zero bits in the relevant fields, which
11849 are 64 bits regardless of what REX.W is, and so it's good enough
11850 (iow, equally broken) in both cases. */
11851 IRTemp addr = IRTemp_INVALID;
11852 Int alen = 0;
11853 HChar dis_buf[50];
11854 UChar modrm = getUChar(delta);
11855 vassert(!epartIsReg(modrm)); /* ensured by caller */
11856 vassert(sz == 4 || sz == 8); /* ditto */
11858 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11859 delta += alen;
11860 gen_SEGV_if_not_64_aligned(addr);
11862 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11864 /* VEX's caller is assumed to have checked this. */
11865 const ULong aSSUMED_XCR0_VALUE = 7;
11867 IRTemp rfbm = newTemp(Ity_I64);
11868 assign(rfbm,
11869 binop(Iop_And64,
11870 binop(Iop_Or64,
11871 binop(Iop_Shl64,
11872 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
11873 unop(Iop_32Uto64, getIRegRAX(4))),
11874 mkU64(aSSUMED_XCR0_VALUE)));
11876 gen_XSAVE_SEQUENCE(addr, rfbm);
11878 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11879 OR-ing the RFBM value into it. */
11880 IRTemp addr_plus_512 = newTemp(Ity_I64);
11881 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512)));
11882 storeLE( mkexpr(addr_plus_512),
11883 binop(Iop_Or8,
11884 unop(Iop_64to8, mkexpr(rfbm)),
11885 loadLE(Ity_I8, mkexpr(addr_plus_512))) );
11887 return delta;
11891 static Long dis_FXSAVE ( const VexAbiInfo* vbi,
11892 Prefix pfx, Long delta, Int sz )
11894 /* See comment in dis_XSAVE about the significance of REX.W. */
11895 IRTemp addr = IRTemp_INVALID;
11896 Int alen = 0;
11897 HChar dis_buf[50];
11898 UChar modrm = getUChar(delta);
11899 vassert(!epartIsReg(modrm)); /* ensured by caller */
11900 vassert(sz == 4 || sz == 8); /* ditto */
11902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11903 delta += alen;
11904 gen_SEGV_if_not_16_aligned(addr);
11906 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11908 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11909 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11910 fold out the unused (AVX) parts accordingly. */
11911 IRTemp rfbm = newTemp(Ity_I64);
11912 assign(rfbm, mkU64(3));
11913 gen_XSAVE_SEQUENCE(addr, rfbm);
11915 return delta;
11919 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm )
11921 /* ------ rfbm[0] gates the x87 state ------ */
11923 /* If rfbm[0] == 1, we have to write the x87 state. If
11924 xstate_bv[0] == 1, we will read it from the memory image, else
11925 we'll set it to initial values. Doing this with a helper
11926 function and getting the definedness flow annotations correct is
11927 too difficult, so generate stupid but simple code: first set the
11928 registers to initial values, regardless of xstate_bv[0]. Then,
11929 conditionally restore from the memory image. */
11931 IRTemp rfbm_0 = newTemp(Ity_I64);
11932 IRTemp xstate_bv_0 = newTemp(Ity_I64);
11933 IRTemp restore_0 = newTemp(Ity_I64);
11934 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1)));
11935 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1)));
11936 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0)));
11938 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) );
11940 /* Uses dirty helper:
11941 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11943 IRDirty* d0 = unsafeIRDirty_0_N (
11944 0/*regparms*/,
11945 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
11946 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0,
11947 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11949 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0));
11951 /* Declare we're reading memory. Really, bytes 24 through 31
11952 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
11953 than 1 memory area here, so just mark the whole thing as
11954 read. */
11955 d0->mFx = Ifx_Read;
11956 d0->mAddr = mkexpr(addr);
11957 d0->mSize = 160;
11959 /* declare we're writing guest state */
11960 d0->nFxState = 5;
11961 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11963 d0->fxState[0].fx = Ifx_Write;
11964 d0->fxState[0].offset = OFFB_FTOP;
11965 d0->fxState[0].size = sizeof(UInt);
11967 d0->fxState[1].fx = Ifx_Write;
11968 d0->fxState[1].offset = OFFB_FPREGS;
11969 d0->fxState[1].size = 8 * sizeof(ULong);
11971 d0->fxState[2].fx = Ifx_Write;
11972 d0->fxState[2].offset = OFFB_FPTAGS;
11973 d0->fxState[2].size = 8 * sizeof(UChar);
11975 d0->fxState[3].fx = Ifx_Write;
11976 d0->fxState[3].offset = OFFB_FPROUND;
11977 d0->fxState[3].size = sizeof(ULong);
11979 d0->fxState[4].fx = Ifx_Write;
11980 d0->fxState[4].offset = OFFB_FC3210;
11981 d0->fxState[4].size = sizeof(ULong);
11983 stmt( IRStmt_Dirty(d0) );
11985 /* ------ rfbm[1] gates the SSE state ------ */
11987 /* Same scheme as component 0: first zero it out, and then possibly
11988 restore from the memory area. */
11989 IRTemp rfbm_1 = newTemp(Ity_I64);
11990 IRTemp xstate_bv_1 = newTemp(Ity_I64);
11991 IRTemp restore_1 = newTemp(Ity_I64);
11992 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11993 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2)));
11994 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1)));
11995 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0));
11996 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0));
11998 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11999 IRTemp xstate_bv_1or2 = newTemp(Ity_I64);
12000 IRTemp restore_1or2 = newTemp(Ity_I64);
12001 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
12002 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6)));
12003 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2),
12004 mkexpr(xstate_bv_1or2)));
12005 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
12006 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0));
12008 /* The areas in question are: SSEROUND, and the XMM register array. */
12009 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST));
12011 UInt reg;
12012 for (reg = 0; reg < 16; reg++) {
12013 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0));
12016 /* And now possibly restore from MXCSR/MXCSR_MASK */
12017 /* Uses dirty helper:
12018 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
12019 ( VexGuestAMD64State*, ULong )
12020 This restores from only MXCSR and MXCSR_MASK. We need to do
12021 this if either components 1 (SSE) or 2 (AVX) are requested.
12022 Hence the guard condition is a bit more complex.
12024 IRDirty* d1 = unsafeIRDirty_0_N (
12025 0/*regparms*/,
12026 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12027 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS,
12028 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
12030 d1->guard = restore_1or2e;
12032 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12033 the code for rbfm[0] just above claims a read of 0 .. 159, so
12034 this duplicates it. But at least correctly connects 24 .. 31 to
12035 the MXCSR guest state representation (SSEROUND field). */
12036 d1->mFx = Ifx_Read;
12037 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
12038 d1->mSize = 8;
12040 /* declare we're writing guest state */
12041 d1->nFxState = 1;
12042 vex_bzero(&d1->fxState, sizeof(d1->fxState));
12044 d1->fxState[0].fx = Ifx_Write;
12045 d1->fxState[0].offset = OFFB_SSEROUND;
12046 d1->fxState[0].size = sizeof(ULong);
12048 /* Call the helper. This creates SSEROUND but nothing
12049 else. We do the actual register array, XMM[0..15], separately,
12050 in order that any undefinedness in the XMM registers is tracked
12051 separately by Memcheck and is not "infected" by the in-memory
12052 shadow for the other parts of the image. */
12053 stmt( IRStmt_Dirty(d1) );
12055 /* And now the XMMs themselves. For each register, we PUT either
12056 its old value, or the value loaded from memory. One convenient
12057 way to do that is with a conditional load that has its the
12058 default value, the old value of the register. */
12059 for (reg = 0; reg < 16; reg++) {
12060 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16));
12061 IRExpr* alt = getXMMReg(reg);
12062 IRTemp loadedValue = newTemp(Ity_V128);
12063 stmt( IRStmt_LoadG(Iend_LE,
12064 ILGop_IdentV128,
12065 loadedValue, ea, alt, restore_1e) );
12066 putXMMReg(reg, mkexpr(loadedValue));
12069 /* ------ rfbm[2] gates the AVX state ------ */
12070 /* Component 2 is just a bunch of register loads, so we'll do it
12071 inline, just to be simple and to be Memcheck friendly. */
12073 /* Same scheme as component 0: first zero it out, and then possibly
12074 restore from the memory area. */
12075 IRTemp rfbm_2 = newTemp(Ity_I64);
12076 IRTemp xstate_bv_2 = newTemp(Ity_I64);
12077 IRTemp restore_2 = newTemp(Ity_I64);
12078 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
12079 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4)));
12080 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2)));
12082 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0));
12083 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0));
12085 for (reg = 0; reg < 16; reg++) {
12086 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0));
12089 for (reg = 0; reg < 16; reg++) {
12090 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16));
12091 IRExpr* alt = getYMMRegLane128(reg, 1);
12092 IRTemp loadedValue = newTemp(Ity_V128);
12093 stmt( IRStmt_LoadG(Iend_LE,
12094 ILGop_IdentV128,
12095 loadedValue, ea, alt, restore_2e) );
12096 putYMMRegLane128(reg, 1, mkexpr(loadedValue));
12101 static Long dis_XRSTOR ( const VexAbiInfo* vbi,
12102 Prefix pfx, Long delta, Int sz )
12104 /* As with XRSTOR above we ignore the value of REX.W since we're
12105 not bothering with the FPU DP and IP fields. */
12106 IRTemp addr = IRTemp_INVALID;
12107 Int alen = 0;
12108 HChar dis_buf[50];
12109 UChar modrm = getUChar(delta);
12110 vassert(!epartIsReg(modrm)); /* ensured by caller */
12111 vassert(sz == 4 || sz == 8); /* ditto */
12113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12114 delta += alen;
12115 gen_SEGV_if_not_64_aligned(addr);
12117 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12119 /* VEX's caller is assumed to have checked this. */
12120 const ULong aSSUMED_XCR0_VALUE = 7;
12122 IRTemp rfbm = newTemp(Ity_I64);
12123 assign(rfbm,
12124 binop(Iop_And64,
12125 binop(Iop_Or64,
12126 binop(Iop_Shl64,
12127 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
12128 unop(Iop_32Uto64, getIRegRAX(4))),
12129 mkU64(aSSUMED_XCR0_VALUE)));
12131 IRTemp xstate_bv = newTemp(Ity_I64);
12132 assign(xstate_bv, loadLE(Ity_I64,
12133 binop(Iop_Add64, mkexpr(addr), mkU64(512+0))));
12135 IRTemp xcomp_bv = newTemp(Ity_I64);
12136 assign(xcomp_bv, loadLE(Ity_I64,
12137 binop(Iop_Add64, mkexpr(addr), mkU64(512+8))));
12139 IRTemp xsavehdr_23_16 = newTemp(Ity_I64);
12140 assign( xsavehdr_23_16,
12141 loadLE(Ity_I64,
12142 binop(Iop_Add64, mkexpr(addr), mkU64(512+16))));
12144 /* We must fault if
12145 * xcomp_bv[63] == 1, since this simulated CPU does not support
12146 the compaction extension.
12147 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12148 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12149 imply that xcomp_bv must be zero.
12150 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12152 IRTemp fault_if_nonzero = newTemp(Ity_I64);
12153 assign(fault_if_nonzero,
12154 binop(Iop_Or64,
12155 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)),
12156 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16))));
12157 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)),
12158 Ijk_SigSEGV,
12159 IRConst_U64(guest_RIP_curr_instr),
12160 OFFB_RIP
12163 /* We are guaranteed now that both xstate_bv and rfbm are in the
12164 range 0 .. 7. Generate the restore sequence proper. */
12165 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm);
12167 return delta;
12171 static Long dis_FXRSTOR ( const VexAbiInfo* vbi,
12172 Prefix pfx, Long delta, Int sz )
12174 /* As with FXSAVE above we ignore the value of REX.W since we're
12175 not bothering with the FPU DP and IP fields. */
12176 IRTemp addr = IRTemp_INVALID;
12177 Int alen = 0;
12178 HChar dis_buf[50];
12179 UChar modrm = getUChar(delta);
12180 vassert(!epartIsReg(modrm)); /* ensured by caller */
12181 vassert(sz == 4 || sz == 8); /* ditto */
12183 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12184 delta += alen;
12185 gen_SEGV_if_not_16_aligned(addr);
12187 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12189 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12190 as if components 0 and 1 are set as present in XSTATE_BV in the
12191 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12192 generate the XRSTOR sequence accordingly, and let iropt fold out
12193 the unused (AVX) parts accordingly. */
12194 IRTemp three = newTemp(Ity_I64);
12195 assign(three, mkU64(3));
12196 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/);
12198 return delta;
12202 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
12204 vassert(imm8 >= 0 && imm8 <= 7);
12206 // Create a V128 value which has the selected word in the
12207 // specified lane, and zeroes everywhere else.
12208 IRTemp tmp128 = newTemp(Ity_V128);
12209 IRTemp halfshift = newTemp(Ity_I64);
12210 assign(halfshift, binop(Iop_Shl64,
12211 unop(Iop_16Uto64, mkexpr(u16)),
12212 mkU8(16 * (imm8 & 3))));
12213 if (imm8 < 4) {
12214 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
12215 } else {
12216 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
12219 UShort mask = ~(3 << (imm8 * 2));
12220 IRTemp res = newTemp(Ity_V128);
12221 assign( res, binop(Iop_OrV128,
12222 mkexpr(tmp128),
12223 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
12224 return res;
12228 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
12230 IRTemp s1, s0, d1, d0;
12231 s1 = s0 = d1 = d0 = IRTemp_INVALID;
12233 breakupV128to64s( sV, &s1, &s0 );
12234 breakupV128to64s( dV, &d1, &d0 );
12236 IRTemp res = newTemp(Ity_V128);
12237 assign( res,
12238 binop(Iop_64HLtoV128,
12239 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12240 "amd64g_calculate_mmx_psadbw",
12241 &amd64g_calculate_mmx_psadbw,
12242 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
12243 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12244 "amd64g_calculate_mmx_psadbw",
12245 &amd64g_calculate_mmx_psadbw,
12246 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
12247 return res;
12251 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
12253 IRTemp sHi, sLo, dHi, dLo;
12254 sHi = sLo = dHi = dLo = IRTemp_INVALID;
12255 breakupV256toV128s( dV, &dHi, &dLo);
12256 breakupV256toV128s( sV, &sHi, &sLo);
12257 IRTemp res = newTemp(Ity_V256);
12258 assign(res, binop(Iop_V128HLtoV256,
12259 mkexpr(math_PSADBW_128(dHi, sHi)),
12260 mkexpr(math_PSADBW_128(dLo, sLo))));
12261 return res;
12265 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx,
12266 Long delta, Bool isAvx )
12268 IRTemp regD = newTemp(Ity_V128);
12269 IRTemp mask = newTemp(Ity_V128);
12270 IRTemp olddata = newTemp(Ity_V128);
12271 IRTemp newdata = newTemp(Ity_V128);
12272 IRTemp addr = newTemp(Ity_I64);
12273 UChar modrm = getUChar(delta);
12274 UInt rG = gregOfRexRM(pfx,modrm);
12275 UInt rE = eregOfRexRM(pfx,modrm);
12277 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
12278 assign( regD, getXMMReg( rG ));
12280 /* Unfortunately can't do the obvious thing with SarN8x16
12281 here since that can't be re-emitted as SSE2 code - no such
12282 insn. */
12283 assign( mask,
12284 binop(Iop_64HLtoV128,
12285 binop(Iop_SarN8x8,
12286 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
12287 mkU8(7) ),
12288 binop(Iop_SarN8x8,
12289 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
12290 mkU8(7) ) ));
12291 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
12292 assign( newdata, binop(Iop_OrV128,
12293 binop(Iop_AndV128,
12294 mkexpr(regD),
12295 mkexpr(mask) ),
12296 binop(Iop_AndV128,
12297 mkexpr(olddata),
12298 unop(Iop_NotV128, mkexpr(mask)))) );
12299 storeLE( mkexpr(addr), mkexpr(newdata) );
12301 delta += 1;
12302 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
12303 nameXMMReg(rE), nameXMMReg(rG) );
12304 return delta;
12308 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx,
12309 Long delta, Bool isAvx )
12311 UChar modrm = getUChar(delta);
12312 UInt rG = gregOfRexRM(pfx,modrm);
12313 UInt rE = eregOfRexRM(pfx,modrm);
12314 IRTemp t0 = newTemp(Ity_I32);
12315 IRTemp t1 = newTemp(Ity_I32);
12316 IRTemp t2 = newTemp(Ity_I32);
12317 IRTemp t3 = newTemp(Ity_I32);
12318 delta += 1;
12319 assign( t0, binop( Iop_And32,
12320 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
12321 mkU32(1) ));
12322 assign( t1, binop( Iop_And32,
12323 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
12324 mkU32(2) ));
12325 assign( t2, binop( Iop_And32,
12326 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
12327 mkU32(4) ));
12328 assign( t3, binop( Iop_And32,
12329 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
12330 mkU32(8) ));
12331 putIReg32( rG, binop(Iop_Or32,
12332 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12333 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12334 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
12335 nameXMMReg(rE), nameIReg32(rG));
12336 return delta;
12340 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12342 UChar modrm = getUChar(delta);
12343 UInt rG = gregOfRexRM(pfx,modrm);
12344 UInt rE = eregOfRexRM(pfx,modrm);
12345 IRTemp t0 = newTemp(Ity_I32);
12346 IRTemp t1 = newTemp(Ity_I32);
12347 IRTemp t2 = newTemp(Ity_I32);
12348 IRTemp t3 = newTemp(Ity_I32);
12349 IRTemp t4 = newTemp(Ity_I32);
12350 IRTemp t5 = newTemp(Ity_I32);
12351 IRTemp t6 = newTemp(Ity_I32);
12352 IRTemp t7 = newTemp(Ity_I32);
12353 delta += 1;
12354 assign( t0, binop( Iop_And32,
12355 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
12356 mkU32(1) ));
12357 assign( t1, binop( Iop_And32,
12358 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
12359 mkU32(2) ));
12360 assign( t2, binop( Iop_And32,
12361 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
12362 mkU32(4) ));
12363 assign( t3, binop( Iop_And32,
12364 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
12365 mkU32(8) ));
12366 assign( t4, binop( Iop_And32,
12367 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
12368 mkU32(16) ));
12369 assign( t5, binop( Iop_And32,
12370 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
12371 mkU32(32) ));
12372 assign( t6, binop( Iop_And32,
12373 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
12374 mkU32(64) ));
12375 assign( t7, binop( Iop_And32,
12376 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
12377 mkU32(128) ));
12378 putIReg32( rG, binop(Iop_Or32,
12379 binop(Iop_Or32,
12380 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12381 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
12382 binop(Iop_Or32,
12383 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
12384 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
12385 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12386 return delta;
12390 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx,
12391 Long delta, Bool isAvx )
12393 UChar modrm = getUChar(delta);
12394 UInt rG = gregOfRexRM(pfx,modrm);
12395 UInt rE = eregOfRexRM(pfx,modrm);
12396 IRTemp t0 = newTemp(Ity_I32);
12397 IRTemp t1 = newTemp(Ity_I32);
12398 delta += 1;
12399 assign( t0, binop( Iop_And32,
12400 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
12401 mkU32(1) ));
12402 assign( t1, binop( Iop_And32,
12403 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
12404 mkU32(2) ));
12405 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
12406 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
12407 nameXMMReg(rE), nameIReg32(rG));
12408 return delta;
12412 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12414 UChar modrm = getUChar(delta);
12415 UInt rG = gregOfRexRM(pfx,modrm);
12416 UInt rE = eregOfRexRM(pfx,modrm);
12417 IRTemp t0 = newTemp(Ity_I32);
12418 IRTemp t1 = newTemp(Ity_I32);
12419 IRTemp t2 = newTemp(Ity_I32);
12420 IRTemp t3 = newTemp(Ity_I32);
12421 delta += 1;
12422 assign( t0, binop( Iop_And32,
12423 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
12424 mkU32(1) ));
12425 assign( t1, binop( Iop_And32,
12426 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
12427 mkU32(2) ));
12428 assign( t2, binop( Iop_And32,
12429 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
12430 mkU32(4) ));
12431 assign( t3, binop( Iop_And32,
12432 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
12433 mkU32(8) ));
12434 putIReg32( rG, binop(Iop_Or32,
12435 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12436 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12437 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12438 return delta;
12442 /* Note, this also handles SSE(1) insns. */
12443 __attribute__((noinline))
12444 static
12445 Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
12446 const VexArchInfo* archinfo,
12447 const VexAbiInfo* vbi,
12448 Prefix pfx, Int sz, Long deltaIN,
12449 DisResult* dres )
12451 IRTemp addr = IRTemp_INVALID;
12452 IRTemp t0 = IRTemp_INVALID;
12453 IRTemp t1 = IRTemp_INVALID;
12454 IRTemp t2 = IRTemp_INVALID;
12455 IRTemp t3 = IRTemp_INVALID;
12456 IRTemp t4 = IRTemp_INVALID;
12457 IRTemp t5 = IRTemp_INVALID;
12458 IRTemp t6 = IRTemp_INVALID;
12459 UChar modrm = 0;
12460 Int alen = 0;
12461 HChar dis_buf[50];
12463 *decode_OK = False;
12465 Long delta = deltaIN;
12466 UChar opc = getUChar(delta);
12467 delta++;
12468 switch (opc) {
12470 case 0x10:
12471 if (have66noF2noF3(pfx)
12472 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12473 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12474 modrm = getUChar(delta);
12475 if (epartIsReg(modrm)) {
12476 putXMMReg( gregOfRexRM(pfx,modrm),
12477 getXMMReg( eregOfRexRM(pfx,modrm) ));
12478 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12479 nameXMMReg(gregOfRexRM(pfx,modrm)));
12480 delta += 1;
12481 } else {
12482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12483 putXMMReg( gregOfRexRM(pfx,modrm),
12484 loadLE(Ity_V128, mkexpr(addr)) );
12485 DIP("movupd %s,%s\n", dis_buf,
12486 nameXMMReg(gregOfRexRM(pfx,modrm)));
12487 delta += alen;
12489 goto decode_success;
12491 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12492 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12493 If E is reg, upper half of G is unchanged. */
12494 if (haveF2no66noF3(pfx)
12495 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
12496 modrm = getUChar(delta);
12497 if (epartIsReg(modrm)) {
12498 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12499 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
12500 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12501 nameXMMReg(gregOfRexRM(pfx,modrm)));
12502 delta += 1;
12503 } else {
12504 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12505 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12506 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12507 loadLE(Ity_I64, mkexpr(addr)) );
12508 DIP("movsd %s,%s\n", dis_buf,
12509 nameXMMReg(gregOfRexRM(pfx,modrm)));
12510 delta += alen;
12512 goto decode_success;
12514 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12515 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12516 if (haveF3no66noF2(pfx)
12517 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12518 modrm = getUChar(delta);
12519 if (epartIsReg(modrm)) {
12520 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12521 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
12522 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12523 nameXMMReg(gregOfRexRM(pfx,modrm)));
12524 delta += 1;
12525 } else {
12526 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12527 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12528 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12529 loadLE(Ity_I32, mkexpr(addr)) );
12530 DIP("movss %s,%s\n", dis_buf,
12531 nameXMMReg(gregOfRexRM(pfx,modrm)));
12532 delta += alen;
12534 goto decode_success;
12536 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12537 if (haveNo66noF2noF3(pfx)
12538 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12539 modrm = getUChar(delta);
12540 if (epartIsReg(modrm)) {
12541 putXMMReg( gregOfRexRM(pfx,modrm),
12542 getXMMReg( eregOfRexRM(pfx,modrm) ));
12543 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12544 nameXMMReg(gregOfRexRM(pfx,modrm)));
12545 delta += 1;
12546 } else {
12547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12548 putXMMReg( gregOfRexRM(pfx,modrm),
12549 loadLE(Ity_V128, mkexpr(addr)) );
12550 DIP("movups %s,%s\n", dis_buf,
12551 nameXMMReg(gregOfRexRM(pfx,modrm)));
12552 delta += alen;
12554 goto decode_success;
12556 break;
12558 case 0x11:
12559 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12560 or lo half xmm). */
12561 if (haveF2no66noF3(pfx)
12562 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12563 modrm = getUChar(delta);
12564 if (epartIsReg(modrm)) {
12565 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
12566 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
12567 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12568 nameXMMReg(eregOfRexRM(pfx,modrm)));
12569 delta += 1;
12570 } else {
12571 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12572 storeLE( mkexpr(addr),
12573 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
12574 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12575 dis_buf);
12576 delta += alen;
12578 goto decode_success;
12580 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12581 or lo 1/4 xmm). */
12582 if (haveF3no66noF2(pfx) && sz == 4) {
12583 modrm = getUChar(delta);
12584 if (epartIsReg(modrm)) {
12585 /* fall through, we don't yet have a test case */
12586 } else {
12587 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12588 storeLE( mkexpr(addr),
12589 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
12590 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12591 dis_buf);
12592 delta += alen;
12593 goto decode_success;
12596 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12597 if (have66noF2noF3(pfx)
12598 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12599 modrm = getUChar(delta);
12600 if (epartIsReg(modrm)) {
12601 putXMMReg( eregOfRexRM(pfx,modrm),
12602 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12603 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12604 nameXMMReg(eregOfRexRM(pfx,modrm)));
12605 delta += 1;
12606 } else {
12607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12608 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12609 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12610 dis_buf );
12611 delta += alen;
12613 goto decode_success;
12615 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12616 if (haveNo66noF2noF3(pfx)
12617 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12618 modrm = getUChar(delta);
12619 if (epartIsReg(modrm)) {
12620 /* fall through; awaiting test case */
12621 } else {
12622 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12623 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12624 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12625 dis_buf );
12626 delta += alen;
12627 goto decode_success;
12630 break;
12632 case 0x12:
12633 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12634 /* Identical to MOVLPS ? */
12635 if (have66noF2noF3(pfx)
12636 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12637 modrm = getUChar(delta);
12638 if (epartIsReg(modrm)) {
12639 /* fall through; apparently reg-reg is not possible */
12640 } else {
12641 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12642 delta += alen;
12643 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12644 0/*lower lane*/,
12645 loadLE(Ity_I64, mkexpr(addr)) );
12646 DIP("movlpd %s, %s\n",
12647 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12648 goto decode_success;
12651 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12652 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12653 if (haveNo66noF2noF3(pfx)
12654 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12655 modrm = getUChar(delta);
12656 if (epartIsReg(modrm)) {
12657 delta += 1;
12658 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12659 0/*lower lane*/,
12660 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
12661 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12662 nameXMMReg(gregOfRexRM(pfx,modrm)));
12663 } else {
12664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12665 delta += alen;
12666 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
12667 loadLE(Ity_I64, mkexpr(addr)) );
12668 DIP("movlps %s, %s\n",
12669 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12671 goto decode_success;
12673 break;
12675 case 0x13:
12676 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12677 if (haveNo66noF2noF3(pfx)
12678 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12679 modrm = getUChar(delta);
12680 if (!epartIsReg(modrm)) {
12681 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12682 delta += alen;
12683 storeLE( mkexpr(addr),
12684 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12685 0/*lower lane*/ ) );
12686 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12687 dis_buf);
12688 goto decode_success;
12690 /* else fall through */
12692 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12693 /* Identical to MOVLPS ? */
12694 if (have66noF2noF3(pfx)
12695 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12696 modrm = getUChar(delta);
12697 if (!epartIsReg(modrm)) {
12698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12699 delta += alen;
12700 storeLE( mkexpr(addr),
12701 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12702 0/*lower lane*/ ) );
12703 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12704 dis_buf);
12705 goto decode_success;
12707 /* else fall through */
12709 break;
12711 case 0x14:
12712 case 0x15:
12713 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12714 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12715 /* These just appear to be special cases of SHUFPS */
12716 if (haveNo66noF2noF3(pfx) && sz == 4) {
12717 Bool hi = toBool(opc == 0x15);
12718 IRTemp sV = newTemp(Ity_V128);
12719 IRTemp dV = newTemp(Ity_V128);
12720 modrm = getUChar(delta);
12721 UInt rG = gregOfRexRM(pfx,modrm);
12722 assign( dV, getXMMReg(rG) );
12723 if (epartIsReg(modrm)) {
12724 UInt rE = eregOfRexRM(pfx,modrm);
12725 assign( sV, getXMMReg(rE) );
12726 delta += 1;
12727 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12728 nameXMMReg(rE), nameXMMReg(rG));
12729 } else {
12730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12731 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12732 delta += alen;
12733 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12734 dis_buf, nameXMMReg(rG));
12736 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
12737 putXMMReg( rG, mkexpr(res) );
12738 goto decode_success;
12740 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12741 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12742 /* These just appear to be special cases of SHUFPS */
12743 if (have66noF2noF3(pfx)
12744 && sz == 2 /* could be 8 if rex also present */) {
12745 Bool hi = toBool(opc == 0x15);
12746 IRTemp sV = newTemp(Ity_V128);
12747 IRTemp dV = newTemp(Ity_V128);
12748 modrm = getUChar(delta);
12749 UInt rG = gregOfRexRM(pfx,modrm);
12750 assign( dV, getXMMReg(rG) );
12751 if (epartIsReg(modrm)) {
12752 UInt rE = eregOfRexRM(pfx,modrm);
12753 assign( sV, getXMMReg(rE) );
12754 delta += 1;
12755 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12756 nameXMMReg(rE), nameXMMReg(rG));
12757 } else {
12758 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12759 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12760 delta += alen;
12761 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12762 dis_buf, nameXMMReg(rG));
12764 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12765 putXMMReg( rG, mkexpr(res) );
12766 goto decode_success;
12768 break;
12770 case 0x16:
12771 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12772 /* These seems identical to MOVHPS. This instruction encoding is
12773 completely crazy. */
12774 if (have66noF2noF3(pfx)
12775 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12776 modrm = getUChar(delta);
12777 if (epartIsReg(modrm)) {
12778 /* fall through; apparently reg-reg is not possible */
12779 } else {
12780 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12781 delta += alen;
12782 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12783 loadLE(Ity_I64, mkexpr(addr)) );
12784 DIP("movhpd %s,%s\n", dis_buf,
12785 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12786 goto decode_success;
12789 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12790 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12791 if (haveNo66noF2noF3(pfx)
12792 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12793 modrm = getUChar(delta);
12794 if (epartIsReg(modrm)) {
12795 delta += 1;
12796 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12797 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12798 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12799 nameXMMReg(gregOfRexRM(pfx,modrm)));
12800 } else {
12801 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12802 delta += alen;
12803 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12804 loadLE(Ity_I64, mkexpr(addr)) );
12805 DIP("movhps %s,%s\n", dis_buf,
12806 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12808 goto decode_success;
12810 break;
12812 case 0x17:
12813 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12814 if (haveNo66noF2noF3(pfx)
12815 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12816 modrm = getUChar(delta);
12817 if (!epartIsReg(modrm)) {
12818 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12819 delta += alen;
12820 storeLE( mkexpr(addr),
12821 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12822 1/*upper lane*/ ) );
12823 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12824 dis_buf);
12825 goto decode_success;
12827 /* else fall through */
12829 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12830 /* Again, this seems identical to MOVHPS. */
12831 if (have66noF2noF3(pfx)
12832 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12833 modrm = getUChar(delta);
12834 if (!epartIsReg(modrm)) {
12835 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12836 delta += alen;
12837 storeLE( mkexpr(addr),
12838 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12839 1/*upper lane*/ ) );
12840 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12841 dis_buf);
12842 goto decode_success;
12844 /* else fall through */
12846 break;
12848 case 0x18:
12849 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12850 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12851 /* 0F 18 /2 = PREFETCH1 */
12852 /* 0F 18 /3 = PREFETCH2 */
12853 if (haveNo66noF2noF3(pfx)
12854 && !epartIsReg(getUChar(delta))
12855 && gregLO3ofRM(getUChar(delta)) >= 0
12856 && gregLO3ofRM(getUChar(delta)) <= 3) {
12857 const HChar* hintstr = "??";
12859 modrm = getUChar(delta);
12860 vassert(!epartIsReg(modrm));
12862 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12863 delta += alen;
12865 switch (gregLO3ofRM(modrm)) {
12866 case 0: hintstr = "nta"; break;
12867 case 1: hintstr = "t0"; break;
12868 case 2: hintstr = "t1"; break;
12869 case 3: hintstr = "t2"; break;
12870 default: vassert(0);
12873 DIP("prefetch%s %s\n", hintstr, dis_buf);
12874 goto decode_success;
12876 break;
12878 case 0x28:
12879 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12880 if (have66noF2noF3(pfx)
12881 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12882 modrm = getUChar(delta);
12883 if (epartIsReg(modrm)) {
12884 putXMMReg( gregOfRexRM(pfx,modrm),
12885 getXMMReg( eregOfRexRM(pfx,modrm) ));
12886 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12887 nameXMMReg(gregOfRexRM(pfx,modrm)));
12888 delta += 1;
12889 } else {
12890 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12891 gen_SEGV_if_not_16_aligned( addr );
12892 putXMMReg( gregOfRexRM(pfx,modrm),
12893 loadLE(Ity_V128, mkexpr(addr)) );
12894 DIP("movapd %s,%s\n", dis_buf,
12895 nameXMMReg(gregOfRexRM(pfx,modrm)));
12896 delta += alen;
12898 goto decode_success;
12900 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12901 if (haveNo66noF2noF3(pfx)
12902 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12903 modrm = getUChar(delta);
12904 if (epartIsReg(modrm)) {
12905 putXMMReg( gregOfRexRM(pfx,modrm),
12906 getXMMReg( eregOfRexRM(pfx,modrm) ));
12907 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12908 nameXMMReg(gregOfRexRM(pfx,modrm)));
12909 delta += 1;
12910 } else {
12911 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12912 gen_SEGV_if_not_16_aligned( addr );
12913 putXMMReg( gregOfRexRM(pfx,modrm),
12914 loadLE(Ity_V128, mkexpr(addr)) );
12915 DIP("movaps %s,%s\n", dis_buf,
12916 nameXMMReg(gregOfRexRM(pfx,modrm)));
12917 delta += alen;
12919 goto decode_success;
12921 break;
12923 case 0x29:
12924 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12925 if (haveNo66noF2noF3(pfx)
12926 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12927 modrm = getUChar(delta);
12928 if (epartIsReg(modrm)) {
12929 putXMMReg( eregOfRexRM(pfx,modrm),
12930 getXMMReg( gregOfRexRM(pfx,modrm) ));
12931 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12932 nameXMMReg(eregOfRexRM(pfx,modrm)));
12933 delta += 1;
12934 } else {
12935 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12936 gen_SEGV_if_not_16_aligned( addr );
12937 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12938 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12939 dis_buf );
12940 delta += alen;
12942 goto decode_success;
12944 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12945 if (have66noF2noF3(pfx)
12946 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12947 modrm = getUChar(delta);
12948 if (epartIsReg(modrm)) {
12949 putXMMReg( eregOfRexRM(pfx,modrm),
12950 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12951 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12952 nameXMMReg(eregOfRexRM(pfx,modrm)));
12953 delta += 1;
12954 } else {
12955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12956 gen_SEGV_if_not_16_aligned( addr );
12957 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12958 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12959 dis_buf );
12960 delta += alen;
12962 goto decode_success;
12964 break;
12966 case 0x2A:
12967 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12968 half xmm */
12969 if (haveNo66noF2noF3(pfx) && sz == 4) {
12970 IRTemp arg64 = newTemp(Ity_I64);
12971 IRTemp rmode = newTemp(Ity_I32);
12973 modrm = getUChar(delta);
12974 if (epartIsReg(modrm)) {
12975 /* Only switch to MMX mode if the source is a MMX register.
12976 See comments on CVTPI2PD for details. Fixes #357059. */
12977 do_MMX_preamble();
12978 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12979 delta += 1;
12980 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12981 nameXMMReg(gregOfRexRM(pfx,modrm)));
12982 } else {
12983 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12984 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12985 delta += alen;
12986 DIP("cvtpi2ps %s,%s\n", dis_buf,
12987 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12990 assign( rmode, get_sse_roundingmode() );
12992 putXMMRegLane32F(
12993 gregOfRexRM(pfx,modrm), 0,
12994 binop(Iop_F64toF32,
12995 mkexpr(rmode),
12996 unop(Iop_I32StoF64,
12997 unop(Iop_64to32, mkexpr(arg64)) )) );
12999 putXMMRegLane32F(
13000 gregOfRexRM(pfx,modrm), 1,
13001 binop(Iop_F64toF32,
13002 mkexpr(rmode),
13003 unop(Iop_I32StoF64,
13004 unop(Iop_64HIto32, mkexpr(arg64)) )) );
13006 goto decode_success;
13008 /* F3 0F 2A = CVTSI2SS
13009 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
13010 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
13011 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13012 IRTemp rmode = newTemp(Ity_I32);
13013 assign( rmode, get_sse_roundingmode() );
13014 modrm = getUChar(delta);
13015 if (sz == 4) {
13016 IRTemp arg32 = newTemp(Ity_I32);
13017 if (epartIsReg(modrm)) {
13018 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13019 delta += 1;
13020 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13021 nameXMMReg(gregOfRexRM(pfx,modrm)));
13022 } else {
13023 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13024 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13025 delta += alen;
13026 DIP("cvtsi2ss %s,%s\n", dis_buf,
13027 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13029 putXMMRegLane32F(
13030 gregOfRexRM(pfx,modrm), 0,
13031 binop(Iop_F64toF32,
13032 mkexpr(rmode),
13033 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
13034 } else {
13035 /* sz == 8 */
13036 IRTemp arg64 = newTemp(Ity_I64);
13037 if (epartIsReg(modrm)) {
13038 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13039 delta += 1;
13040 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13041 nameXMMReg(gregOfRexRM(pfx,modrm)));
13042 } else {
13043 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13044 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13045 delta += alen;
13046 DIP("cvtsi2ssq %s,%s\n", dis_buf,
13047 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13049 putXMMRegLane32F(
13050 gregOfRexRM(pfx,modrm), 0,
13051 binop(Iop_F64toF32,
13052 mkexpr(rmode),
13053 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
13055 goto decode_success;
13057 /* F2 0F 2A = CVTSI2SD
13058 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13059 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13061 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13062 modrm = getUChar(delta);
13063 if (sz == 4) {
13064 IRTemp arg32 = newTemp(Ity_I32);
13065 if (epartIsReg(modrm)) {
13066 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13067 delta += 1;
13068 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13069 nameXMMReg(gregOfRexRM(pfx,modrm)));
13070 } else {
13071 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13072 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13073 delta += alen;
13074 DIP("cvtsi2sdl %s,%s\n", dis_buf,
13075 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13077 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13078 unop(Iop_I32StoF64, mkexpr(arg32))
13080 } else {
13081 /* sz == 8 */
13082 IRTemp arg64 = newTemp(Ity_I64);
13083 if (epartIsReg(modrm)) {
13084 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13085 delta += 1;
13086 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13087 nameXMMReg(gregOfRexRM(pfx,modrm)));
13088 } else {
13089 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13090 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13091 delta += alen;
13092 DIP("cvtsi2sdq %s,%s\n", dis_buf,
13093 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13095 putXMMRegLane64F(
13096 gregOfRexRM(pfx,modrm),
13098 binop( Iop_I64StoF64,
13099 get_sse_roundingmode(),
13100 mkexpr(arg64)
13104 goto decode_success;
13106 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13107 xmm(G) */
13108 if (have66noF2noF3(pfx) && sz == 2) {
13109 IRTemp arg64 = newTemp(Ity_I64);
13111 modrm = getUChar(delta);
13112 if (epartIsReg(modrm)) {
13113 /* Only switch to MMX mode if the source is a MMX register.
13114 This is inconsistent with all other instructions which
13115 convert between XMM and (M64 or MMX), which always switch
13116 to MMX mode even if 64-bit operand is M64 and not MMX. At
13117 least, that's what the Intel docs seem to me to say.
13118 Fixes #210264. */
13119 do_MMX_preamble();
13120 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13121 delta += 1;
13122 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13123 nameXMMReg(gregOfRexRM(pfx,modrm)));
13124 } else {
13125 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13126 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13127 delta += alen;
13128 DIP("cvtpi2pd %s,%s\n", dis_buf,
13129 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13132 putXMMRegLane64F(
13133 gregOfRexRM(pfx,modrm), 0,
13134 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
13137 putXMMRegLane64F(
13138 gregOfRexRM(pfx,modrm), 1,
13139 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
13142 goto decode_success;
13144 break;
13146 case 0x2B:
13147 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13148 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13149 if ( (haveNo66noF2noF3(pfx) && sz == 4)
13150 || (have66noF2noF3(pfx) && sz == 2) ) {
13151 modrm = getUChar(delta);
13152 if (!epartIsReg(modrm)) {
13153 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13154 gen_SEGV_if_not_16_aligned( addr );
13155 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13156 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
13157 dis_buf,
13158 nameXMMReg(gregOfRexRM(pfx,modrm)));
13159 delta += alen;
13160 goto decode_success;
13162 /* else fall through */
13164 break;
13166 case 0x2C:
13167 case 0x2D:
13168 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13169 I32 in mmx, according to prevailing SSE rounding mode */
13170 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13171 I32 in mmx, rounding towards zero */
13172 if (haveNo66noF2noF3(pfx) && sz == 4) {
13173 IRTemp dst64 = newTemp(Ity_I64);
13174 IRTemp rmode = newTemp(Ity_I32);
13175 IRTemp f32lo = newTemp(Ity_F32);
13176 IRTemp f32hi = newTemp(Ity_F32);
13177 Bool r2zero = toBool(opc == 0x2C);
13179 do_MMX_preamble();
13180 modrm = getUChar(delta);
13182 if (epartIsReg(modrm)) {
13183 delta += 1;
13184 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13185 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
13186 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13187 nameXMMReg(eregOfRexRM(pfx,modrm)),
13188 nameMMXReg(gregLO3ofRM(modrm)));
13189 } else {
13190 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13191 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13192 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
13193 mkexpr(addr),
13194 mkU64(4) )));
13195 delta += alen;
13196 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13197 dis_buf,
13198 nameMMXReg(gregLO3ofRM(modrm)));
13201 if (r2zero) {
13202 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13203 } else {
13204 assign( rmode, get_sse_roundingmode() );
13207 assign(
13208 dst64,
13209 binop( Iop_32HLto64,
13210 binop( Iop_F64toI32S,
13211 mkexpr(rmode),
13212 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
13213 binop( Iop_F64toI32S,
13214 mkexpr(rmode),
13215 unop( Iop_F32toF64, mkexpr(f32lo) ) )
13219 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13220 goto decode_success;
13222 /* F3 0F 2D = CVTSS2SI
13223 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13224 according to prevailing SSE rounding mode
13225 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13226 according to prevailing SSE rounding mode
13228 /* F3 0F 2C = CVTTSS2SI
13229 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13230 truncating towards zero
13231 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13232 truncating towards zero
13234 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13235 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13236 goto decode_success;
13238 /* F2 0F 2D = CVTSD2SI
13239 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13240 according to prevailing SSE rounding mode
13241 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13242 according to prevailing SSE rounding mode
13244 /* F2 0F 2C = CVTTSD2SI
13245 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13246 truncating towards zero
13247 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13248 truncating towards zero
13250 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13251 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13252 goto decode_success;
13254 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13255 I32 in mmx, according to prevailing SSE rounding mode */
13256 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13257 I32 in mmx, rounding towards zero */
13258 if (have66noF2noF3(pfx) && sz == 2) {
13259 IRTemp dst64 = newTemp(Ity_I64);
13260 IRTemp rmode = newTemp(Ity_I32);
13261 IRTemp f64lo = newTemp(Ity_F64);
13262 IRTemp f64hi = newTemp(Ity_F64);
13263 Bool r2zero = toBool(opc == 0x2C);
13265 do_MMX_preamble();
13266 modrm = getUChar(delta);
13268 if (epartIsReg(modrm)) {
13269 delta += 1;
13270 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13271 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
13272 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
13273 nameXMMReg(eregOfRexRM(pfx,modrm)),
13274 nameMMXReg(gregLO3ofRM(modrm)));
13275 } else {
13276 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13277 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13278 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
13279 mkexpr(addr),
13280 mkU64(8) )));
13281 delta += alen;
13282 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
13283 dis_buf,
13284 nameMMXReg(gregLO3ofRM(modrm)));
13287 if (r2zero) {
13288 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13289 } else {
13290 assign( rmode, get_sse_roundingmode() );
13293 assign(
13294 dst64,
13295 binop( Iop_32HLto64,
13296 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
13297 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
13301 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13302 goto decode_success;
13304 break;
13306 case 0x2E:
13307 case 0x2F:
13308 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13309 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13310 if (have66noF2noF3(pfx) && sz == 2) {
13311 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
13312 goto decode_success;
13314 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13315 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13316 if (haveNo66noF2noF3(pfx) && sz == 4) {
13317 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
13318 goto decode_success;
13320 break;
13322 case 0x50:
13323 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13324 to 4 lowest bits of ireg(G) */
13325 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13326 && epartIsReg(getUChar(delta))) {
13327 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13328 set to 1, which has been known to happen:
13330 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13332 20071106: Intel docs say that REX.W isn't redundant: when
13333 present, a 64-bit register is written; when not present, only
13334 the 32-bit half is written. However, testing on a Core2
13335 machine suggests the entire 64 bit register is written
13336 irrespective of the status of REX.W. That could be because
13337 of the default rule that says "if the lower half of a 32-bit
13338 register is written, the upper half is zeroed". By using
13339 putIReg32 here we inadvertantly produce the same behaviour as
13340 the Core2, for the same reason -- putIReg32 implements said
13341 rule.
13343 AMD docs give no indication that REX.W is even valid for this
13344 insn. */
13345 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
13346 goto decode_success;
13348 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13349 2 lowest bits of ireg(G) */
13350 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13351 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13352 set to 1, which has been known to happen:
13353 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13354 20071106: see further comments on MOVMSKPS implementation above.
13356 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
13357 goto decode_success;
13359 break;
13361 case 0x51:
13362 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13363 if (haveF3no66noF2(pfx) && sz == 4) {
13364 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13365 "sqrtss", Iop_Sqrt32F0x4 );
13366 goto decode_success;
13368 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13369 if (haveNo66noF2noF3(pfx) && sz == 4) {
13370 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13371 "sqrtps", Iop_Sqrt32Fx4 );
13372 goto decode_success;
13374 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13375 if (haveF2no66noF3(pfx) && sz == 4) {
13376 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
13377 "sqrtsd", Iop_Sqrt64F0x2 );
13378 goto decode_success;
13380 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13381 if (have66noF2noF3(pfx) && sz == 2) {
13382 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13383 "sqrtpd", Iop_Sqrt64Fx2 );
13384 goto decode_success;
13386 break;
13388 case 0x52:
13389 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13390 if (haveF3no66noF2(pfx) && sz == 4) {
13391 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13392 "rsqrtss", Iop_RSqrtEst32F0x4 );
13393 goto decode_success;
13395 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13396 if (haveNo66noF2noF3(pfx) && sz == 4) {
13397 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13398 "rsqrtps", Iop_RSqrtEst32Fx4 );
13399 goto decode_success;
13401 break;
13403 case 0x53:
13404 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13405 if (haveF3no66noF2(pfx) && sz == 4) {
13406 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13407 "rcpss", Iop_RecipEst32F0x4 );
13408 goto decode_success;
13410 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13411 if (haveNo66noF2noF3(pfx) && sz == 4) {
13412 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13413 "rcpps", Iop_RecipEst32Fx4 );
13414 goto decode_success;
13416 break;
13418 case 0x54:
13419 /* 0F 54 = ANDPS -- G = G and E */
13420 if (haveNo66noF2noF3(pfx) && sz == 4) {
13421 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
13422 goto decode_success;
13424 /* 66 0F 54 = ANDPD -- G = G and E */
13425 if (have66noF2noF3(pfx) && sz == 2) {
13426 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
13427 goto decode_success;
13429 break;
13431 case 0x55:
13432 /* 0F 55 = ANDNPS -- G = (not G) and E */
13433 if (haveNo66noF2noF3(pfx) && sz == 4) {
13434 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
13435 Iop_AndV128 );
13436 goto decode_success;
13438 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13439 if (have66noF2noF3(pfx) && sz == 2) {
13440 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
13441 Iop_AndV128 );
13442 goto decode_success;
13444 break;
13446 case 0x56:
13447 /* 0F 56 = ORPS -- G = G and E */
13448 if (haveNo66noF2noF3(pfx) && sz == 4) {
13449 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
13450 goto decode_success;
13452 /* 66 0F 56 = ORPD -- G = G and E */
13453 if (have66noF2noF3(pfx) && sz == 2) {
13454 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
13455 goto decode_success;
13457 break;
13459 case 0x57:
13460 /* 66 0F 57 = XORPD -- G = G xor E */
13461 if (have66noF2noF3(pfx) && sz == 2) {
13462 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
13463 goto decode_success;
13465 /* 0F 57 = XORPS -- G = G xor E */
13466 if (haveNo66noF2noF3(pfx) && sz == 4) {
13467 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
13468 goto decode_success;
13470 break;
13472 case 0x58:
13473 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13474 if (haveNo66noF2noF3(pfx) && sz == 4) {
13475 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
13476 goto decode_success;
13478 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13479 if (haveF3no66noF2(pfx) && sz == 4) {
13480 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
13481 goto decode_success;
13483 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13484 if (haveF2no66noF3(pfx)
13485 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13486 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
13487 goto decode_success;
13489 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13490 if (have66noF2noF3(pfx)
13491 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13492 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
13493 goto decode_success;
13495 break;
13497 case 0x59:
13498 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13499 if (haveF2no66noF3(pfx)
13500 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13501 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
13502 goto decode_success;
13504 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13505 if (haveF3no66noF2(pfx) && sz == 4) {
13506 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
13507 goto decode_success;
13509 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13510 if (haveNo66noF2noF3(pfx) && sz == 4) {
13511 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
13512 goto decode_success;
13514 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13515 if (have66noF2noF3(pfx)
13516 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13517 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
13518 goto decode_success;
13520 break;
13522 case 0x5A:
13523 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13524 F64 in xmm(G). */
13525 if (haveNo66noF2noF3(pfx)
13526 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13527 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
13528 goto decode_success;
13530 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13531 low half xmm(G) */
13532 if (haveF3no66noF2(pfx) && sz == 4) {
13533 IRTemp f32lo = newTemp(Ity_F32);
13535 modrm = getUChar(delta);
13536 if (epartIsReg(modrm)) {
13537 delta += 1;
13538 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13539 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13540 nameXMMReg(gregOfRexRM(pfx,modrm)));
13541 } else {
13542 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13543 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13544 delta += alen;
13545 DIP("cvtss2sd %s,%s\n", dis_buf,
13546 nameXMMReg(gregOfRexRM(pfx,modrm)));
13549 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13550 unop( Iop_F32toF64, mkexpr(f32lo) ) );
13552 goto decode_success;
13554 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13555 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13556 if (haveF2no66noF3(pfx) && sz == 4) {
13557 IRTemp rmode = newTemp(Ity_I32);
13558 IRTemp f64lo = newTemp(Ity_F64);
13560 modrm = getUChar(delta);
13561 if (epartIsReg(modrm)) {
13562 delta += 1;
13563 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13564 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13565 nameXMMReg(gregOfRexRM(pfx,modrm)));
13566 } else {
13567 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13568 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13569 delta += alen;
13570 DIP("cvtsd2ss %s,%s\n", dis_buf,
13571 nameXMMReg(gregOfRexRM(pfx,modrm)));
13574 assign( rmode, get_sse_roundingmode() );
13575 putXMMRegLane32F(
13576 gregOfRexRM(pfx,modrm), 0,
13577 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
13580 goto decode_success;
13582 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13583 lo half xmm(G), rounding according to prevailing SSE rounding
13584 mode, and zero upper half */
13585 /* Note, this is practically identical to CVTPD2DQ. It would have
13586 be nice to merge them together. */
13587 if (have66noF2noF3(pfx) && sz == 2) {
13588 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13589 goto decode_success;
13591 break;
13593 case 0x5B:
13594 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13595 xmm(G), rounding towards zero */
13596 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13597 xmm(G), as per the prevailing rounding mode */
13598 if ( (have66noF2noF3(pfx) && sz == 2)
13599 || (haveF3no66noF2(pfx) && sz == 4) ) {
13600 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
13601 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
13602 goto decode_success;
13604 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13605 xmm(G) */
13606 if (haveNo66noF2noF3(pfx) && sz == 4) {
13607 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13608 goto decode_success;
13610 break;
13612 case 0x5C:
13613 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13614 if (haveF3no66noF2(pfx) && sz == 4) {
13615 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
13616 goto decode_success;
13618 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13619 if (haveF2no66noF3(pfx)
13620 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13621 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
13622 goto decode_success;
13624 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13625 if (haveNo66noF2noF3(pfx) && sz == 4) {
13626 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
13627 goto decode_success;
13629 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13630 if (have66noF2noF3(pfx) && sz == 2) {
13631 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
13632 goto decode_success;
13634 break;
13636 case 0x5D:
13637 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13638 if (haveNo66noF2noF3(pfx) && sz == 4) {
13639 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
13640 goto decode_success;
13642 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13643 if (haveF3no66noF2(pfx) && sz == 4) {
13644 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
13645 goto decode_success;
13647 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13648 if (haveF2no66noF3(pfx)
13649 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13650 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
13651 goto decode_success;
13653 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13654 if (have66noF2noF3(pfx) && sz == 2) {
13655 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
13656 goto decode_success;
13658 break;
13660 case 0x5E:
13661 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13662 if (haveF2no66noF3(pfx) && sz == 4) {
13663 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
13664 goto decode_success;
13666 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13667 if (haveNo66noF2noF3(pfx) && sz == 4) {
13668 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
13669 goto decode_success;
13671 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13672 if (haveF3no66noF2(pfx) && sz == 4) {
13673 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
13674 goto decode_success;
13676 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13677 if (have66noF2noF3(pfx) && sz == 2) {
13678 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
13679 goto decode_success;
13681 break;
13683 case 0x5F:
13684 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13685 if (haveNo66noF2noF3(pfx) && sz == 4) {
13686 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
13687 goto decode_success;
13689 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13690 if (haveF3no66noF2(pfx) && sz == 4) {
13691 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
13692 goto decode_success;
13694 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13695 if (haveF2no66noF3(pfx)
13696 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13697 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
13698 goto decode_success;
13700 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13701 if (have66noF2noF3(pfx) && sz == 2) {
13702 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
13703 goto decode_success;
13705 break;
13707 case 0x60:
13708 /* 66 0F 60 = PUNPCKLBW */
13709 if (have66noF2noF3(pfx) && sz == 2) {
13710 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13711 "punpcklbw",
13712 Iop_InterleaveLO8x16, True );
13713 goto decode_success;
13715 break;
13717 case 0x61:
13718 /* 66 0F 61 = PUNPCKLWD */
13719 if (have66noF2noF3(pfx) && sz == 2) {
13720 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13721 "punpcklwd",
13722 Iop_InterleaveLO16x8, True );
13723 goto decode_success;
13725 break;
13727 case 0x62:
13728 /* 66 0F 62 = PUNPCKLDQ */
13729 if (have66noF2noF3(pfx) && sz == 2) {
13730 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13731 "punpckldq",
13732 Iop_InterleaveLO32x4, True );
13733 goto decode_success;
13735 break;
13737 case 0x63:
13738 /* 66 0F 63 = PACKSSWB */
13739 if (have66noF2noF3(pfx) && sz == 2) {
13740 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13741 "packsswb",
13742 Iop_QNarrowBin16Sto8Sx16, True );
13743 goto decode_success;
13745 break;
13747 case 0x64:
13748 /* 66 0F 64 = PCMPGTB */
13749 if (have66noF2noF3(pfx) && sz == 2) {
13750 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13751 "pcmpgtb", Iop_CmpGT8Sx16, False );
13752 goto decode_success;
13754 break;
13756 case 0x65:
13757 /* 66 0F 65 = PCMPGTW */
13758 if (have66noF2noF3(pfx) && sz == 2) {
13759 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13760 "pcmpgtw", Iop_CmpGT16Sx8, False );
13761 goto decode_success;
13763 break;
13765 case 0x66:
13766 /* 66 0F 66 = PCMPGTD */
13767 if (have66noF2noF3(pfx) && sz == 2) {
13768 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13769 "pcmpgtd", Iop_CmpGT32Sx4, False );
13770 goto decode_success;
13772 break;
13774 case 0x67:
13775 /* 66 0F 67 = PACKUSWB */
13776 if (have66noF2noF3(pfx) && sz == 2) {
13777 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13778 "packuswb",
13779 Iop_QNarrowBin16Sto8Ux16, True );
13780 goto decode_success;
13782 break;
13784 case 0x68:
13785 /* 66 0F 68 = PUNPCKHBW */
13786 if (have66noF2noF3(pfx) && sz == 2) {
13787 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13788 "punpckhbw",
13789 Iop_InterleaveHI8x16, True );
13790 goto decode_success;
13792 break;
13794 case 0x69:
13795 /* 66 0F 69 = PUNPCKHWD */
13796 if (have66noF2noF3(pfx) && sz == 2) {
13797 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13798 "punpckhwd",
13799 Iop_InterleaveHI16x8, True );
13800 goto decode_success;
13802 break;
13804 case 0x6A:
13805 /* 66 0F 6A = PUNPCKHDQ */
13806 if (have66noF2noF3(pfx) && sz == 2) {
13807 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13808 "punpckhdq",
13809 Iop_InterleaveHI32x4, True );
13810 goto decode_success;
13812 break;
13814 case 0x6B:
13815 /* 66 0F 6B = PACKSSDW */
13816 if (have66noF2noF3(pfx) && sz == 2) {
13817 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13818 "packssdw",
13819 Iop_QNarrowBin32Sto16Sx8, True );
13820 goto decode_success;
13822 break;
13824 case 0x6C:
13825 /* 66 0F 6C = PUNPCKLQDQ */
13826 if (have66noF2noF3(pfx) && sz == 2) {
13827 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13828 "punpcklqdq",
13829 Iop_InterleaveLO64x2, True );
13830 goto decode_success;
13832 break;
13834 case 0x6D:
13835 /* 66 0F 6D = PUNPCKHQDQ */
13836 if (have66noF2noF3(pfx) && sz == 2) {
13837 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13838 "punpckhqdq",
13839 Iop_InterleaveHI64x2, True );
13840 goto decode_success;
13842 break;
13844 case 0x6E:
13845 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13846 zeroing high 3/4 of xmm. */
13847 /* or from ireg64/m64 to xmm lo 1/2,
13848 zeroing high 1/2 of xmm. */
13849 if (have66noF2noF3(pfx)) {
13850 vassert(sz == 2 || sz == 8);
13851 if (sz == 2) sz = 4;
13852 modrm = getUChar(delta);
13853 if (epartIsReg(modrm)) {
13854 delta += 1;
13855 if (sz == 4) {
13856 putXMMReg(
13857 gregOfRexRM(pfx,modrm),
13858 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13860 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13861 nameXMMReg(gregOfRexRM(pfx,modrm)));
13862 } else {
13863 putXMMReg(
13864 gregOfRexRM(pfx,modrm),
13865 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13867 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13868 nameXMMReg(gregOfRexRM(pfx,modrm)));
13870 } else {
13871 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13872 delta += alen;
13873 putXMMReg(
13874 gregOfRexRM(pfx,modrm),
13875 sz == 4
13876 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13877 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13879 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13880 nameXMMReg(gregOfRexRM(pfx,modrm)));
13882 goto decode_success;
13884 break;
13886 case 0x6F:
13887 if (have66noF2noF3(pfx)
13888 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13889 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13890 modrm = getUChar(delta);
13891 if (epartIsReg(modrm)) {
13892 putXMMReg( gregOfRexRM(pfx,modrm),
13893 getXMMReg( eregOfRexRM(pfx,modrm) ));
13894 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13895 nameXMMReg(gregOfRexRM(pfx,modrm)));
13896 delta += 1;
13897 } else {
13898 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13899 gen_SEGV_if_not_16_aligned( addr );
13900 putXMMReg( gregOfRexRM(pfx,modrm),
13901 loadLE(Ity_V128, mkexpr(addr)) );
13902 DIP("movdqa %s,%s\n", dis_buf,
13903 nameXMMReg(gregOfRexRM(pfx,modrm)));
13904 delta += alen;
13906 goto decode_success;
13908 if (haveF3no66noF2(pfx) && sz == 4) {
13909 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13910 modrm = getUChar(delta);
13911 if (epartIsReg(modrm)) {
13912 putXMMReg( gregOfRexRM(pfx,modrm),
13913 getXMMReg( eregOfRexRM(pfx,modrm) ));
13914 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13915 nameXMMReg(gregOfRexRM(pfx,modrm)));
13916 delta += 1;
13917 } else {
13918 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13919 putXMMReg( gregOfRexRM(pfx,modrm),
13920 loadLE(Ity_V128, mkexpr(addr)) );
13921 DIP("movdqu %s,%s\n", dis_buf,
13922 nameXMMReg(gregOfRexRM(pfx,modrm)));
13923 delta += alen;
13925 goto decode_success;
13927 break;
13929 case 0x70:
13930 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13931 if (have66noF2noF3(pfx) && sz == 2) {
13932 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
13933 goto decode_success;
13935 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13936 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13937 if (haveNo66noF2noF3(pfx) && sz == 4) {
13938 Int order;
13939 IRTemp sV, dV, s3, s2, s1, s0;
13940 s3 = s2 = s1 = s0 = IRTemp_INVALID;
13941 sV = newTemp(Ity_I64);
13942 dV = newTemp(Ity_I64);
13943 do_MMX_preamble();
13944 modrm = getUChar(delta);
13945 if (epartIsReg(modrm)) {
13946 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13947 order = (Int)getUChar(delta+1);
13948 delta += 1+1;
13949 DIP("pshufw $%d,%s,%s\n", order,
13950 nameMMXReg(eregLO3ofRM(modrm)),
13951 nameMMXReg(gregLO3ofRM(modrm)));
13952 } else {
13953 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13954 1/*extra byte after amode*/ );
13955 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13956 order = (Int)getUChar(delta+alen);
13957 delta += 1+alen;
13958 DIP("pshufw $%d,%s,%s\n", order,
13959 dis_buf,
13960 nameMMXReg(gregLO3ofRM(modrm)));
13962 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
13963 # define SEL(n) \
13964 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13965 assign(dV,
13966 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
13967 SEL((order>>2)&3), SEL((order>>0)&3) )
13969 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
13970 # undef SEL
13971 goto decode_success;
13973 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13974 mem) to G(xmm), and copy upper half */
13975 if (haveF2no66noF3(pfx) && sz == 4) {
13976 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13977 False/*!isAvx*/, False/*!xIsH*/ );
13978 goto decode_success;
13980 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13981 mem) to G(xmm), and copy lower half */
13982 if (haveF3no66noF2(pfx) && sz == 4) {
13983 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13984 False/*!isAvx*/, True/*xIsH*/ );
13985 goto decode_success;
13987 break;
13989 case 0x71:
13990 /* 66 0F 71 /2 ib = PSRLW by immediate */
13991 if (have66noF2noF3(pfx) && sz == 2
13992 && epartIsReg(getUChar(delta))
13993 && gregLO3ofRM(getUChar(delta)) == 2) {
13994 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
13995 goto decode_success;
13997 /* 66 0F 71 /4 ib = PSRAW by immediate */
13998 if (have66noF2noF3(pfx) && sz == 2
13999 && epartIsReg(getUChar(delta))
14000 && gregLO3ofRM(getUChar(delta)) == 4) {
14001 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
14002 goto decode_success;
14004 /* 66 0F 71 /6 ib = PSLLW by immediate */
14005 if (have66noF2noF3(pfx) && sz == 2
14006 && epartIsReg(getUChar(delta))
14007 && gregLO3ofRM(getUChar(delta)) == 6) {
14008 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
14009 goto decode_success;
14011 break;
14013 case 0x72:
14014 /* 66 0F 72 /2 ib = PSRLD by immediate */
14015 if (have66noF2noF3(pfx) && sz == 2
14016 && epartIsReg(getUChar(delta))
14017 && gregLO3ofRM(getUChar(delta)) == 2) {
14018 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
14019 goto decode_success;
14021 /* 66 0F 72 /4 ib = PSRAD by immediate */
14022 if (have66noF2noF3(pfx) && sz == 2
14023 && epartIsReg(getUChar(delta))
14024 && gregLO3ofRM(getUChar(delta)) == 4) {
14025 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
14026 goto decode_success;
14028 /* 66 0F 72 /6 ib = PSLLD by immediate */
14029 if (have66noF2noF3(pfx) && sz == 2
14030 && epartIsReg(getUChar(delta))
14031 && gregLO3ofRM(getUChar(delta)) == 6) {
14032 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
14033 goto decode_success;
14035 break;
14037 case 0x73:
14038 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14039 /* note, if mem case ever filled in, 1 byte after amode */
14040 if (have66noF2noF3(pfx) && sz == 2
14041 && epartIsReg(getUChar(delta))
14042 && gregLO3ofRM(getUChar(delta)) == 3) {
14043 Int imm = (Int)getUChar(delta+1);
14044 Int reg = eregOfRexRM(pfx,getUChar(delta));
14045 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
14046 delta += 2;
14047 IRTemp sV = newTemp(Ity_V128);
14048 assign( sV, getXMMReg(reg) );
14049 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
14050 goto decode_success;
14052 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14053 /* note, if mem case ever filled in, 1 byte after amode */
14054 if (have66noF2noF3(pfx) && sz == 2
14055 && epartIsReg(getUChar(delta))
14056 && gregLO3ofRM(getUChar(delta)) == 7) {
14057 Int imm = (Int)getUChar(delta+1);
14058 Int reg = eregOfRexRM(pfx,getUChar(delta));
14059 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
14060 vassert(imm >= 0 && imm <= 255);
14061 delta += 2;
14062 IRTemp sV = newTemp(Ity_V128);
14063 assign( sV, getXMMReg(reg) );
14064 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
14065 goto decode_success;
14067 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14068 if (have66noF2noF3(pfx) && sz == 2
14069 && epartIsReg(getUChar(delta))
14070 && gregLO3ofRM(getUChar(delta)) == 2) {
14071 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
14072 goto decode_success;
14074 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14075 if (have66noF2noF3(pfx) && sz == 2
14076 && epartIsReg(getUChar(delta))
14077 && gregLO3ofRM(getUChar(delta)) == 6) {
14078 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
14079 goto decode_success;
14081 break;
14083 case 0x74:
14084 /* 66 0F 74 = PCMPEQB */
14085 if (have66noF2noF3(pfx) && sz == 2) {
14086 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14087 "pcmpeqb", Iop_CmpEQ8x16, False );
14088 goto decode_success;
14090 break;
14092 case 0x75:
14093 /* 66 0F 75 = PCMPEQW */
14094 if (have66noF2noF3(pfx) && sz == 2) {
14095 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14096 "pcmpeqw", Iop_CmpEQ16x8, False );
14097 goto decode_success;
14099 break;
14101 case 0x76:
14102 /* 66 0F 76 = PCMPEQD */
14103 if (have66noF2noF3(pfx) && sz == 2) {
14104 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14105 "pcmpeqd", Iop_CmpEQ32x4, False );
14106 goto decode_success;
14108 break;
14110 case 0x7E:
14111 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14112 G (lo half xmm). Upper half of G is zeroed out. */
14113 if (haveF3no66noF2(pfx)
14114 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14115 modrm = getUChar(delta);
14116 if (epartIsReg(modrm)) {
14117 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14118 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14119 /* zero bits 127:64 */
14120 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
14121 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14122 nameXMMReg(gregOfRexRM(pfx,modrm)));
14123 delta += 1;
14124 } else {
14125 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14126 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
14127 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14128 loadLE(Ity_I64, mkexpr(addr)) );
14129 DIP("movsd %s,%s\n", dis_buf,
14130 nameXMMReg(gregOfRexRM(pfx,modrm)));
14131 delta += alen;
14133 goto decode_success;
14135 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14136 /* or from xmm low 1/2 to ireg64 or m64. */
14137 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
14138 if (sz == 2) sz = 4;
14139 modrm = getUChar(delta);
14140 if (epartIsReg(modrm)) {
14141 delta += 1;
14142 if (sz == 4) {
14143 putIReg32( eregOfRexRM(pfx,modrm),
14144 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
14145 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14146 nameIReg32(eregOfRexRM(pfx,modrm)));
14147 } else {
14148 putIReg64( eregOfRexRM(pfx,modrm),
14149 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
14150 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14151 nameIReg64(eregOfRexRM(pfx,modrm)));
14153 } else {
14154 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14155 delta += alen;
14156 storeLE( mkexpr(addr),
14157 sz == 4
14158 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
14159 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
14160 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
14161 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14163 goto decode_success;
14165 break;
14167 case 0x7F:
14168 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14169 if (haveF3no66noF2(pfx) && sz == 4) {
14170 modrm = getUChar(delta);
14171 if (epartIsReg(modrm)) {
14172 goto decode_failure; /* awaiting test case */
14173 delta += 1;
14174 putXMMReg( eregOfRexRM(pfx,modrm),
14175 getXMMReg(gregOfRexRM(pfx,modrm)) );
14176 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14177 nameXMMReg(eregOfRexRM(pfx,modrm)));
14178 } else {
14179 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14180 delta += alen;
14181 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14182 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14184 goto decode_success;
14186 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14187 if (have66noF2noF3(pfx) && sz == 2) {
14188 modrm = getUChar(delta);
14189 if (epartIsReg(modrm)) {
14190 delta += 1;
14191 putXMMReg( eregOfRexRM(pfx,modrm),
14192 getXMMReg(gregOfRexRM(pfx,modrm)) );
14193 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14194 nameXMMReg(eregOfRexRM(pfx,modrm)));
14195 } else {
14196 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14197 gen_SEGV_if_not_16_aligned( addr );
14198 delta += alen;
14199 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14200 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14202 goto decode_success;
14204 break;
14206 case 0xAE:
14207 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14208 if (haveNo66noF2noF3(pfx)
14209 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14210 && sz == 4) {
14211 delta += 1;
14212 /* Insert a memory fence. It's sometimes important that these
14213 are carried through to the generated code. */
14214 stmt( IRStmt_MBE(Imbe_Fence) );
14215 DIP("sfence\n");
14216 goto decode_success;
14218 /* mindless duplication follows .. */
14219 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14220 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14221 if (haveNo66noF2noF3(pfx)
14222 && epartIsReg(getUChar(delta))
14223 && (gregLO3ofRM(getUChar(delta)) == 5
14224 || gregLO3ofRM(getUChar(delta)) == 6)
14225 && sz == 4) {
14226 delta += 1;
14227 /* Insert a memory fence. It's sometimes important that these
14228 are carried through to the generated code. */
14229 stmt( IRStmt_MBE(Imbe_Fence) );
14230 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
14231 goto decode_success;
14234 /* 0F AE /7 = CLFLUSH -- flush cache line */
14235 if (haveNo66noF2noF3(pfx)
14236 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14237 && sz == 4) {
14239 /* This is something of a hack. We need to know the size of
14240 the cache line containing addr. Since we don't (easily),
14241 assume 256 on the basis that no real cache would have a
14242 line that big. It's safe to invalidate more stuff than we
14243 need, just inefficient. */
14244 ULong lineszB = 256ULL;
14246 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14247 delta += alen;
14249 /* Round addr down to the start of the containing block. */
14250 stmt( IRStmt_Put(
14251 OFFB_CMSTART,
14252 binop( Iop_And64,
14253 mkexpr(addr),
14254 mkU64( ~(lineszB-1) ))) );
14256 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
14258 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
14260 DIP("clflush %s\n", dis_buf);
14261 goto decode_success;
14264 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14265 if (haveNo66noF2noF3(pfx)
14266 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
14267 && sz == 4) {
14268 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14269 goto decode_success;
14271 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14272 if (haveNo66noF2noF3(pfx)
14273 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
14274 && sz == 4) {
14275 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14276 goto decode_success;
14278 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14279 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14280 && !epartIsReg(getUChar(delta))
14281 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
14282 delta = dis_FXSAVE(vbi, pfx, delta, sz);
14283 goto decode_success;
14285 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14286 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14287 && !epartIsReg(getUChar(delta))
14288 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
14289 delta = dis_FXRSTOR(vbi, pfx, delta, sz);
14290 goto decode_success;
14292 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14293 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14294 && !epartIsReg(getUChar(delta))
14295 && gregOfRexRM(pfx,getUChar(delta)) == 4
14296 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14297 delta = dis_XSAVE(vbi, pfx, delta, sz);
14298 goto decode_success;
14300 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14301 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14302 && !epartIsReg(getUChar(delta))
14303 && gregOfRexRM(pfx,getUChar(delta)) == 5
14304 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14305 delta = dis_XRSTOR(vbi, pfx, delta, sz);
14306 goto decode_success;
14308 break;
14310 case 0xC2:
14311 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14312 if (haveNo66noF2noF3(pfx) && sz == 4) {
14313 Long delta0 = delta;
14314 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
14315 if (delta > delta0) goto decode_success;
14317 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14318 if (haveF3no66noF2(pfx) && sz == 4) {
14319 Long delta0 = delta;
14320 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
14321 if (delta > delta0) goto decode_success;
14323 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14324 if (haveF2no66noF3(pfx) && sz == 4) {
14325 Long delta0 = delta;
14326 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
14327 if (delta > delta0) goto decode_success;
14329 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14330 if (have66noF2noF3(pfx) && sz == 2) {
14331 Long delta0 = delta;
14332 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
14333 if (delta > delta0) goto decode_success;
14335 break;
14337 case 0xC3:
14338 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14339 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14340 modrm = getUChar(delta);
14341 if (!epartIsReg(modrm)) {
14342 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14343 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
14344 DIP("movnti %s,%s\n", dis_buf,
14345 nameIRegG(sz, pfx, modrm));
14346 delta += alen;
14347 goto decode_success;
14349 /* else fall through */
14351 break;
14353 case 0xC4:
14354 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14355 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14356 put it into the specified lane of mmx(G). */
14357 if (haveNo66noF2noF3(pfx)
14358 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14359 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14360 mmx reg. t4 is the new lane value. t5 is the original
14361 mmx value. t6 is the new mmx value. */
14362 Int lane;
14363 t4 = newTemp(Ity_I16);
14364 t5 = newTemp(Ity_I64);
14365 t6 = newTemp(Ity_I64);
14366 modrm = getUChar(delta);
14367 do_MMX_preamble();
14369 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
14370 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
14372 if (epartIsReg(modrm)) {
14373 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
14374 delta += 1+1;
14375 lane = getUChar(delta-1);
14376 DIP("pinsrw $%d,%s,%s\n", lane,
14377 nameIReg16(eregOfRexRM(pfx,modrm)),
14378 nameMMXReg(gregLO3ofRM(modrm)));
14379 } else {
14380 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14381 delta += 1+alen;
14382 lane = getUChar(delta-1);
14383 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14384 DIP("pinsrw $%d,%s,%s\n", lane,
14385 dis_buf,
14386 nameMMXReg(gregLO3ofRM(modrm)));
14389 switch (lane & 3) {
14390 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
14391 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
14392 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
14393 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
14394 default: vassert(0);
14396 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
14397 goto decode_success;
14399 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14400 put it into the specified lane of xmm(G). */
14401 if (have66noF2noF3(pfx)
14402 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14403 Int lane;
14404 t4 = newTemp(Ity_I16);
14405 modrm = getUChar(delta);
14406 UInt rG = gregOfRexRM(pfx,modrm);
14407 if (epartIsReg(modrm)) {
14408 UInt rE = eregOfRexRM(pfx,modrm);
14409 assign(t4, getIReg16(rE));
14410 delta += 1+1;
14411 lane = getUChar(delta-1);
14412 DIP("pinsrw $%d,%s,%s\n",
14413 lane, nameIReg16(rE), nameXMMReg(rG));
14414 } else {
14415 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14416 1/*byte after the amode*/ );
14417 delta += 1+alen;
14418 lane = getUChar(delta-1);
14419 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14420 DIP("pinsrw $%d,%s,%s\n",
14421 lane, dis_buf, nameXMMReg(rG));
14423 IRTemp src_vec = newTemp(Ity_V128);
14424 assign(src_vec, getXMMReg(rG));
14425 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
14426 putXMMReg(rG, mkexpr(res_vec));
14427 goto decode_success;
14429 break;
14431 case 0xC5:
14432 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14433 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14434 zero-extend of it in ireg(G). */
14435 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14436 modrm = getUChar(delta);
14437 if (epartIsReg(modrm)) {
14438 IRTemp sV = newTemp(Ity_I64);
14439 t5 = newTemp(Ity_I16);
14440 do_MMX_preamble();
14441 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
14442 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
14443 switch (getUChar(delta+1) & 3) {
14444 case 0: assign(t5, mkexpr(t0)); break;
14445 case 1: assign(t5, mkexpr(t1)); break;
14446 case 2: assign(t5, mkexpr(t2)); break;
14447 case 3: assign(t5, mkexpr(t3)); break;
14448 default: vassert(0);
14450 if (sz == 8)
14451 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
14452 else
14453 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
14454 DIP("pextrw $%d,%s,%s\n",
14455 (Int)getUChar(delta+1),
14456 nameMMXReg(eregLO3ofRM(modrm)),
14457 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
14458 : nameIReg32(gregOfRexRM(pfx,modrm))
14460 delta += 2;
14461 goto decode_success;
14463 /* else fall through */
14464 /* note, for anyone filling in the mem case: this insn has one
14465 byte after the amode and therefore you must pass 1 as the
14466 last arg to disAMode */
14468 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14469 zero-extend of it in ireg(G). */
14470 if (have66noF2noF3(pfx)
14471 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14472 Long delta0 = delta;
14473 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
14474 False/*!isAvx*/ );
14475 if (delta > delta0) goto decode_success;
14476 /* else fall through -- decoding has failed */
14478 break;
14480 case 0xC6:
14481 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14482 if (haveNo66noF2noF3(pfx) && sz == 4) {
14483 Int imm8 = 0;
14484 IRTemp sV = newTemp(Ity_V128);
14485 IRTemp dV = newTemp(Ity_V128);
14486 modrm = getUChar(delta);
14487 UInt rG = gregOfRexRM(pfx,modrm);
14488 assign( dV, getXMMReg(rG) );
14489 if (epartIsReg(modrm)) {
14490 UInt rE = eregOfRexRM(pfx,modrm);
14491 assign( sV, getXMMReg(rE) );
14492 imm8 = (Int)getUChar(delta+1);
14493 delta += 1+1;
14494 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
14495 } else {
14496 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14497 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14498 imm8 = (Int)getUChar(delta+alen);
14499 delta += 1+alen;
14500 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
14502 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
14503 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14504 goto decode_success;
14506 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14507 if (have66noF2noF3(pfx) && sz == 2) {
14508 Int select;
14509 IRTemp sV = newTemp(Ity_V128);
14510 IRTemp dV = newTemp(Ity_V128);
14512 modrm = getUChar(delta);
14513 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14515 if (epartIsReg(modrm)) {
14516 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14517 select = (Int)getUChar(delta+1);
14518 delta += 1+1;
14519 DIP("shufpd $%d,%s,%s\n", select,
14520 nameXMMReg(eregOfRexRM(pfx,modrm)),
14521 nameXMMReg(gregOfRexRM(pfx,modrm)));
14522 } else {
14523 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14524 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14525 select = getUChar(delta+alen);
14526 delta += 1+alen;
14527 DIP("shufpd $%d,%s,%s\n", select,
14528 dis_buf,
14529 nameXMMReg(gregOfRexRM(pfx,modrm)));
14532 IRTemp res = math_SHUFPD_128( sV, dV, select );
14533 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14534 goto decode_success;
14536 break;
14538 case 0xD1:
14539 /* 66 0F D1 = PSRLW by E */
14540 if (have66noF2noF3(pfx) && sz == 2) {
14541 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
14542 goto decode_success;
14544 break;
14546 case 0xD2:
14547 /* 66 0F D2 = PSRLD by E */
14548 if (have66noF2noF3(pfx) && sz == 2) {
14549 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
14550 goto decode_success;
14552 break;
14554 case 0xD3:
14555 /* 66 0F D3 = PSRLQ by E */
14556 if (have66noF2noF3(pfx) && sz == 2) {
14557 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
14558 goto decode_success;
14560 break;
14562 case 0xD4:
14563 /* 66 0F D4 = PADDQ */
14564 if (have66noF2noF3(pfx) && sz == 2) {
14565 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14566 "paddq", Iop_Add64x2, False );
14567 goto decode_success;
14569 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14570 /* 0F D4 = PADDQ -- add 64x1 */
14571 if (haveNo66noF2noF3(pfx) && sz == 4) {
14572 do_MMX_preamble();
14573 delta = dis_MMXop_regmem_to_reg (
14574 vbi, pfx, delta, opc, "paddq", False );
14575 goto decode_success;
14577 break;
14579 case 0xD5:
14580 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14581 if (have66noF2noF3(pfx) && sz == 2) {
14582 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14583 "pmullw", Iop_Mul16x8, False );
14584 goto decode_success;
14586 break;
14588 case 0xD6:
14589 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14590 hi half). */
14591 if (haveF3no66noF2(pfx) && sz == 4) {
14592 modrm = getUChar(delta);
14593 if (epartIsReg(modrm)) {
14594 do_MMX_preamble();
14595 putXMMReg( gregOfRexRM(pfx,modrm),
14596 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14597 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14598 nameXMMReg(gregOfRexRM(pfx,modrm)));
14599 delta += 1;
14600 goto decode_success;
14602 /* apparently no mem case for this insn */
14604 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14605 or lo half xmm). */
14606 if (have66noF2noF3(pfx)
14607 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14608 modrm = getUChar(delta);
14609 if (epartIsReg(modrm)) {
14610 /* fall through, awaiting test case */
14611 /* dst: lo half copied, hi half zeroed */
14612 } else {
14613 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14614 storeLE( mkexpr(addr),
14615 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14616 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14617 delta += alen;
14618 goto decode_success;
14621 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14622 if (haveF2no66noF3(pfx) && sz == 4) {
14623 modrm = getUChar(delta);
14624 if (epartIsReg(modrm)) {
14625 do_MMX_preamble();
14626 putMMXReg( gregLO3ofRM(modrm),
14627 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14628 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14629 nameMMXReg(gregLO3ofRM(modrm)));
14630 delta += 1;
14631 goto decode_success;
14633 /* apparently no mem case for this insn */
14635 break;
14637 case 0xD7:
14638 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14639 lanes in xmm(E), turn them into a byte, and put
14640 zero-extend of it in ireg(G). Doing this directly is just
14641 too cumbersome; give up therefore and call a helper. */
14642 if (have66noF2noF3(pfx)
14643 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14644 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14645 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14646 goto decode_success;
14648 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14649 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14650 mmx(E), turn them into a byte, and put zero-extend of it in
14651 ireg(G). */
14652 if (haveNo66noF2noF3(pfx)
14653 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14654 modrm = getUChar(delta);
14655 if (epartIsReg(modrm)) {
14656 do_MMX_preamble();
14657 t0 = newTemp(Ity_I64);
14658 t1 = newTemp(Ity_I32);
14659 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
14660 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14661 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
14662 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14663 nameIReg32(gregOfRexRM(pfx,modrm)));
14664 delta += 1;
14665 goto decode_success;
14667 /* else fall through */
14669 break;
14671 case 0xD8:
14672 /* 66 0F D8 = PSUBUSB */
14673 if (have66noF2noF3(pfx) && sz == 2) {
14674 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14675 "psubusb", Iop_QSub8Ux16, False );
14676 goto decode_success;
14678 break;
14680 case 0xD9:
14681 /* 66 0F D9 = PSUBUSW */
14682 if (have66noF2noF3(pfx) && sz == 2) {
14683 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14684 "psubusw", Iop_QSub16Ux8, False );
14685 goto decode_success;
14687 break;
14689 case 0xDA:
14690 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14691 /* 0F DA = PMINUB -- 8x8 unsigned min */
14692 if (haveNo66noF2noF3(pfx) && sz == 4) {
14693 do_MMX_preamble();
14694 delta = dis_MMXop_regmem_to_reg (
14695 vbi, pfx, delta, opc, "pminub", False );
14696 goto decode_success;
14698 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14699 if (have66noF2noF3(pfx) && sz == 2) {
14700 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14701 "pminub", Iop_Min8Ux16, False );
14702 goto decode_success;
14704 break;
14706 case 0xDB:
14707 /* 66 0F DB = PAND */
14708 if (have66noF2noF3(pfx) && sz == 2) {
14709 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14710 goto decode_success;
14712 break;
14714 case 0xDC:
14715 /* 66 0F DC = PADDUSB */
14716 if (have66noF2noF3(pfx) && sz == 2) {
14717 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14718 "paddusb", Iop_QAdd8Ux16, False );
14719 goto decode_success;
14721 break;
14723 case 0xDD:
14724 /* 66 0F DD = PADDUSW */
14725 if (have66noF2noF3(pfx) && sz == 2) {
14726 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14727 "paddusw", Iop_QAdd16Ux8, False );
14728 goto decode_success;
14730 break;
14732 case 0xDE:
14733 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14734 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14735 if (haveNo66noF2noF3(pfx) && sz == 4) {
14736 do_MMX_preamble();
14737 delta = dis_MMXop_regmem_to_reg (
14738 vbi, pfx, delta, opc, "pmaxub", False );
14739 goto decode_success;
14741 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14742 if (have66noF2noF3(pfx) && sz == 2) {
14743 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14744 "pmaxub", Iop_Max8Ux16, False );
14745 goto decode_success;
14747 break;
14749 case 0xDF:
14750 /* 66 0F DF = PANDN */
14751 if (have66noF2noF3(pfx) && sz == 2) {
14752 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14753 goto decode_success;
14755 break;
14757 case 0xE0:
14758 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14759 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14760 if (haveNo66noF2noF3(pfx) && sz == 4) {
14761 do_MMX_preamble();
14762 delta = dis_MMXop_regmem_to_reg (
14763 vbi, pfx, delta, opc, "pavgb", False );
14764 goto decode_success;
14766 /* 66 0F E0 = PAVGB */
14767 if (have66noF2noF3(pfx) && sz == 2) {
14768 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14769 "pavgb", Iop_Avg8Ux16, False );
14770 goto decode_success;
14772 break;
14774 case 0xE1:
14775 /* 66 0F E1 = PSRAW by E */
14776 if (have66noF2noF3(pfx) && sz == 2) {
14777 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14778 goto decode_success;
14780 break;
14782 case 0xE2:
14783 /* 66 0F E2 = PSRAD by E */
14784 if (have66noF2noF3(pfx) && sz == 2) {
14785 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14786 goto decode_success;
14788 break;
14790 case 0xE3:
14791 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14792 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14793 if (haveNo66noF2noF3(pfx) && sz == 4) {
14794 do_MMX_preamble();
14795 delta = dis_MMXop_regmem_to_reg (
14796 vbi, pfx, delta, opc, "pavgw", False );
14797 goto decode_success;
14799 /* 66 0F E3 = PAVGW */
14800 if (have66noF2noF3(pfx) && sz == 2) {
14801 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14802 "pavgw", Iop_Avg16Ux8, False );
14803 goto decode_success;
14805 break;
14807 case 0xE4:
14808 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14809 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14810 if (haveNo66noF2noF3(pfx) && sz == 4) {
14811 do_MMX_preamble();
14812 delta = dis_MMXop_regmem_to_reg (
14813 vbi, pfx, delta, opc, "pmuluh", False );
14814 goto decode_success;
14816 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14817 if (have66noF2noF3(pfx) && sz == 2) {
14818 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14819 "pmulhuw", Iop_MulHi16Ux8, False );
14820 goto decode_success;
14822 break;
14824 case 0xE5:
14825 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14826 if (have66noF2noF3(pfx) && sz == 2) {
14827 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14828 "pmulhw", Iop_MulHi16Sx8, False );
14829 goto decode_success;
14831 break;
14833 case 0xE6:
14834 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14835 lo half xmm(G), and zero upper half, rounding towards zero */
14836 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14837 lo half xmm(G), according to prevailing rounding mode, and zero
14838 upper half */
14839 if ( (haveF2no66noF3(pfx) && sz == 4)
14840 || (have66noF2noF3(pfx) && sz == 2) ) {
14841 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14842 toBool(sz == 2)/*r2zero*/);
14843 goto decode_success;
14845 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14846 F64 in xmm(G) */
14847 if (haveF3no66noF2(pfx) && sz == 4) {
14848 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
14849 goto decode_success;
14851 break;
14853 case 0xE7:
14854 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14855 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14856 Intel manual does not say anything about the usual business of
14857 the FP reg tags getting trashed whenever an MMX insn happens.
14858 So we just leave them alone.
14860 if (haveNo66noF2noF3(pfx) && sz == 4) {
14861 modrm = getUChar(delta);
14862 if (!epartIsReg(modrm)) {
14863 /* do_MMX_preamble(); Intel docs don't specify this */
14864 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14865 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14866 DIP("movntq %s,%s\n", dis_buf,
14867 nameMMXReg(gregLO3ofRM(modrm)));
14868 delta += alen;
14869 goto decode_success;
14871 /* else fall through */
14873 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14874 if (have66noF2noF3(pfx) && sz == 2) {
14875 modrm = getUChar(delta);
14876 if (!epartIsReg(modrm)) {
14877 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14878 gen_SEGV_if_not_16_aligned( addr );
14879 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14880 DIP("movntdq %s,%s\n", dis_buf,
14881 nameXMMReg(gregOfRexRM(pfx,modrm)));
14882 delta += alen;
14883 goto decode_success;
14885 /* else fall through */
14887 break;
14889 case 0xE8:
14890 /* 66 0F E8 = PSUBSB */
14891 if (have66noF2noF3(pfx) && sz == 2) {
14892 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14893 "psubsb", Iop_QSub8Sx16, False );
14894 goto decode_success;
14896 break;
14898 case 0xE9:
14899 /* 66 0F E9 = PSUBSW */
14900 if (have66noF2noF3(pfx) && sz == 2) {
14901 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14902 "psubsw", Iop_QSub16Sx8, False );
14903 goto decode_success;
14905 break;
14907 case 0xEA:
14908 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14909 /* 0F EA = PMINSW -- 16x4 signed min */
14910 if (haveNo66noF2noF3(pfx) && sz == 4) {
14911 do_MMX_preamble();
14912 delta = dis_MMXop_regmem_to_reg (
14913 vbi, pfx, delta, opc, "pminsw", False );
14914 goto decode_success;
14916 /* 66 0F EA = PMINSW -- 16x8 signed min */
14917 if (have66noF2noF3(pfx) && sz == 2) {
14918 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14919 "pminsw", Iop_Min16Sx8, False );
14920 goto decode_success;
14922 break;
14924 case 0xEB:
14925 /* 66 0F EB = POR */
14926 if (have66noF2noF3(pfx) && sz == 2) {
14927 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14928 goto decode_success;
14930 break;
14932 case 0xEC:
14933 /* 66 0F EC = PADDSB */
14934 if (have66noF2noF3(pfx) && sz == 2) {
14935 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14936 "paddsb", Iop_QAdd8Sx16, False );
14937 goto decode_success;
14939 break;
14941 case 0xED:
14942 /* 66 0F ED = PADDSW */
14943 if (have66noF2noF3(pfx) && sz == 2) {
14944 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14945 "paddsw", Iop_QAdd16Sx8, False );
14946 goto decode_success;
14948 break;
14950 case 0xEE:
14951 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14952 /* 0F EE = PMAXSW -- 16x4 signed max */
14953 if (haveNo66noF2noF3(pfx) && sz == 4) {
14954 do_MMX_preamble();
14955 delta = dis_MMXop_regmem_to_reg (
14956 vbi, pfx, delta, opc, "pmaxsw", False );
14957 goto decode_success;
14959 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14960 if (have66noF2noF3(pfx) && sz == 2) {
14961 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14962 "pmaxsw", Iop_Max16Sx8, False );
14963 goto decode_success;
14965 break;
14967 case 0xEF:
14968 /* 66 0F EF = PXOR */
14969 if (have66noF2noF3(pfx) && sz == 2) {
14970 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
14971 goto decode_success;
14973 break;
14975 case 0xF1:
14976 /* 66 0F F1 = PSLLW by E */
14977 if (have66noF2noF3(pfx) && sz == 2) {
14978 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
14979 goto decode_success;
14981 break;
14983 case 0xF2:
14984 /* 66 0F F2 = PSLLD by E */
14985 if (have66noF2noF3(pfx) && sz == 2) {
14986 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
14987 goto decode_success;
14989 break;
14991 case 0xF3:
14992 /* 66 0F F3 = PSLLQ by E */
14993 if (have66noF2noF3(pfx) && sz == 2) {
14994 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
14995 goto decode_success;
14997 break;
14999 case 0xF4:
15000 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15001 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
15002 half */
15003 if (have66noF2noF3(pfx) && sz == 2) {
15004 IRTemp sV = newTemp(Ity_V128);
15005 IRTemp dV = newTemp(Ity_V128);
15006 modrm = getUChar(delta);
15007 UInt rG = gregOfRexRM(pfx,modrm);
15008 assign( dV, getXMMReg(rG) );
15009 if (epartIsReg(modrm)) {
15010 UInt rE = eregOfRexRM(pfx,modrm);
15011 assign( sV, getXMMReg(rE) );
15012 delta += 1;
15013 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15014 } else {
15015 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15016 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15017 delta += alen;
15018 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
15020 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
15021 goto decode_success;
15023 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15024 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15025 0 to form 64-bit result */
15026 if (haveNo66noF2noF3(pfx) && sz == 4) {
15027 IRTemp sV = newTemp(Ity_I64);
15028 IRTemp dV = newTemp(Ity_I64);
15029 t1 = newTemp(Ity_I32);
15030 t0 = newTemp(Ity_I32);
15031 modrm = getUChar(delta);
15033 do_MMX_preamble();
15034 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15036 if (epartIsReg(modrm)) {
15037 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15038 delta += 1;
15039 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15040 nameMMXReg(gregLO3ofRM(modrm)));
15041 } else {
15042 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15043 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15044 delta += alen;
15045 DIP("pmuludq %s,%s\n", dis_buf,
15046 nameMMXReg(gregLO3ofRM(modrm)));
15049 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
15050 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
15051 putMMXReg( gregLO3ofRM(modrm),
15052 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
15053 goto decode_success;
15055 break;
15057 case 0xF5:
15058 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15059 E(xmm or mem) to G(xmm) */
15060 if (have66noF2noF3(pfx) && sz == 2) {
15061 IRTemp sV = newTemp(Ity_V128);
15062 IRTemp dV = newTemp(Ity_V128);
15063 modrm = getUChar(delta);
15064 UInt rG = gregOfRexRM(pfx,modrm);
15065 if (epartIsReg(modrm)) {
15066 UInt rE = eregOfRexRM(pfx,modrm);
15067 assign( sV, getXMMReg(rE) );
15068 delta += 1;
15069 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15070 } else {
15071 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15072 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15073 delta += alen;
15074 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
15076 assign( dV, getXMMReg(rG) );
15077 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
15078 goto decode_success;
15080 break;
15082 case 0xF6:
15083 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15084 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15085 if (haveNo66noF2noF3(pfx) && sz == 4) {
15086 do_MMX_preamble();
15087 delta = dis_MMXop_regmem_to_reg (
15088 vbi, pfx, delta, opc, "psadbw", False );
15089 goto decode_success;
15091 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15092 from E(xmm or mem) to G(xmm) */
15093 if (have66noF2noF3(pfx) && sz == 2) {
15094 IRTemp sV = newTemp(Ity_V128);
15095 IRTemp dV = newTemp(Ity_V128);
15096 modrm = getUChar(delta);
15097 UInt rG = gregOfRexRM(pfx,modrm);
15098 if (epartIsReg(modrm)) {
15099 UInt rE = eregOfRexRM(pfx,modrm);
15100 assign( sV, getXMMReg(rE) );
15101 delta += 1;
15102 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15103 } else {
15104 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15105 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15106 delta += alen;
15107 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
15109 assign( dV, getXMMReg(rG) );
15110 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
15112 goto decode_success;
15114 break;
15116 case 0xF7:
15117 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15118 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15119 if (haveNo66noF2noF3(pfx) && sz == 4) {
15120 Bool ok = False;
15121 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
15122 if (ok) goto decode_success;
15124 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15125 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
15126 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
15127 goto decode_success;
15129 break;
15131 case 0xF8:
15132 /* 66 0F F8 = PSUBB */
15133 if (have66noF2noF3(pfx) && sz == 2) {
15134 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15135 "psubb", Iop_Sub8x16, False );
15136 goto decode_success;
15138 break;
15140 case 0xF9:
15141 /* 66 0F F9 = PSUBW */
15142 if (have66noF2noF3(pfx) && sz == 2) {
15143 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15144 "psubw", Iop_Sub16x8, False );
15145 goto decode_success;
15147 break;
15149 case 0xFA:
15150 /* 66 0F FA = PSUBD */
15151 if (have66noF2noF3(pfx) && sz == 2) {
15152 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15153 "psubd", Iop_Sub32x4, False );
15154 goto decode_success;
15156 break;
15158 case 0xFB:
15159 /* 66 0F FB = PSUBQ */
15160 if (have66noF2noF3(pfx) && sz == 2) {
15161 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15162 "psubq", Iop_Sub64x2, False );
15163 goto decode_success;
15165 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15166 /* 0F FB = PSUBQ -- sub 64x1 */
15167 if (haveNo66noF2noF3(pfx) && sz == 4) {
15168 do_MMX_preamble();
15169 delta = dis_MMXop_regmem_to_reg (
15170 vbi, pfx, delta, opc, "psubq", False );
15171 goto decode_success;
15173 break;
15175 case 0xFC:
15176 /* 66 0F FC = PADDB */
15177 if (have66noF2noF3(pfx) && sz == 2) {
15178 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15179 "paddb", Iop_Add8x16, False );
15180 goto decode_success;
15182 break;
15184 case 0xFD:
15185 /* 66 0F FD = PADDW */
15186 if (have66noF2noF3(pfx) && sz == 2) {
15187 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15188 "paddw", Iop_Add16x8, False );
15189 goto decode_success;
15191 break;
15193 case 0xFE:
15194 /* 66 0F FE = PADDD */
15195 if (have66noF2noF3(pfx) && sz == 2) {
15196 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15197 "paddd", Iop_Add32x4, False );
15198 goto decode_success;
15200 break;
15202 default:
15203 goto decode_failure;
15207 decode_failure:
15208 *decode_OK = False;
15209 return deltaIN;
15211 decode_success:
15212 *decode_OK = True;
15213 return delta;
15217 /*------------------------------------------------------------*/
15218 /*--- ---*/
15219 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15220 /*--- ---*/
15221 /*------------------------------------------------------------*/
15223 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15224 Long delta, Bool isAvx )
15226 IRTemp addr = IRTemp_INVALID;
15227 Int alen = 0;
15228 HChar dis_buf[50];
15229 IRTemp sV = newTemp(Ity_V128);
15230 IRTemp d0 = newTemp(Ity_I64);
15231 UChar modrm = getUChar(delta);
15232 UInt rG = gregOfRexRM(pfx,modrm);
15233 if (epartIsReg(modrm)) {
15234 UInt rE = eregOfRexRM(pfx,modrm);
15235 assign( sV, getXMMReg(rE) );
15236 DIP("%smovddup %s,%s\n",
15237 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
15238 delta += 1;
15239 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
15240 } else {
15241 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15242 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15243 DIP("%smovddup %s,%s\n",
15244 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
15245 delta += alen;
15247 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15248 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
15249 return delta;
15253 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15254 Long delta )
15256 IRTemp addr = IRTemp_INVALID;
15257 Int alen = 0;
15258 HChar dis_buf[50];
15259 IRTemp d0 = newTemp(Ity_I64);
15260 IRTemp d1 = newTemp(Ity_I64);
15261 UChar modrm = getUChar(delta);
15262 UInt rG = gregOfRexRM(pfx,modrm);
15263 if (epartIsReg(modrm)) {
15264 UInt rE = eregOfRexRM(pfx,modrm);
15265 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
15266 delta += 1;
15267 assign ( d0, getYMMRegLane64(rE, 0) );
15268 assign ( d1, getYMMRegLane64(rE, 2) );
15269 } else {
15270 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15271 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15272 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
15273 mkexpr(addr), mkU64(16))) );
15274 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
15275 delta += alen;
15277 putYMMRegLane64( rG, 0, mkexpr(d0) );
15278 putYMMRegLane64( rG, 1, mkexpr(d0) );
15279 putYMMRegLane64( rG, 2, mkexpr(d1) );
15280 putYMMRegLane64( rG, 3, mkexpr(d1) );
15281 return delta;
15285 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15286 Long delta, Bool isAvx, Bool isL )
15288 IRTemp addr = IRTemp_INVALID;
15289 Int alen = 0;
15290 HChar dis_buf[50];
15291 IRTemp sV = newTemp(Ity_V128);
15292 UChar modrm = getUChar(delta);
15293 UInt rG = gregOfRexRM(pfx,modrm);
15294 IRTemp s3, s2, s1, s0;
15295 s3 = s2 = s1 = s0 = IRTemp_INVALID;
15296 if (epartIsReg(modrm)) {
15297 UInt rE = eregOfRexRM(pfx,modrm);
15298 assign( sV, getXMMReg(rE) );
15299 DIP("%smovs%cdup %s,%s\n",
15300 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
15301 delta += 1;
15302 } else {
15303 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15304 if (!isAvx)
15305 gen_SEGV_if_not_16_aligned( addr );
15306 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15307 DIP("%smovs%cdup %s,%s\n",
15308 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
15309 delta += alen;
15311 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15312 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15313 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
15314 : mkV128from32s( s3, s3, s1, s1 ) );
15315 return delta;
15319 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15320 Long delta, Bool isL )
15322 IRTemp addr = IRTemp_INVALID;
15323 Int alen = 0;
15324 HChar dis_buf[50];
15325 IRTemp sV = newTemp(Ity_V256);
15326 UChar modrm = getUChar(delta);
15327 UInt rG = gregOfRexRM(pfx,modrm);
15328 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
15329 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
15330 if (epartIsReg(modrm)) {
15331 UInt rE = eregOfRexRM(pfx,modrm);
15332 assign( sV, getYMMReg(rE) );
15333 DIP("vmovs%cdup %s,%s\n",
15334 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
15335 delta += 1;
15336 } else {
15337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15338 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15339 DIP("vmovs%cdup %s,%s\n",
15340 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
15341 delta += alen;
15343 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
15344 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
15345 : mkV128from32s( s7, s7, s5, s5 ) );
15346 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
15347 : mkV128from32s( s3, s3, s1, s1 ) );
15348 return delta;
15352 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15354 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15355 IRTemp leftV = newTemp(Ity_V128);
15356 IRTemp rightV = newTemp(Ity_V128);
15357 IRTemp rm = newTemp(Ity_I32);
15358 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15360 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15361 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
15363 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
15364 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
15366 IRTemp res = newTemp(Ity_V128);
15367 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15368 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
15369 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15370 return res;
15374 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15376 IRTemp s1, s0, d1, d0;
15377 IRTemp leftV = newTemp(Ity_V128);
15378 IRTemp rightV = newTemp(Ity_V128);
15379 IRTemp rm = newTemp(Ity_I32);
15380 s1 = s0 = d1 = d0 = IRTemp_INVALID;
15382 breakupV128to64s( sV, &s1, &s0 );
15383 breakupV128to64s( dV, &d1, &d0 );
15385 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
15386 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
15388 IRTemp res = newTemp(Ity_V128);
15389 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15390 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
15391 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15392 return res;
15396 __attribute__((noinline))
15397 static
15398 Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
15399 const VexAbiInfo* vbi,
15400 Prefix pfx, Int sz, Long deltaIN )
15402 IRTemp addr = IRTemp_INVALID;
15403 UChar modrm = 0;
15404 Int alen = 0;
15405 HChar dis_buf[50];
15407 *decode_OK = False;
15409 Long delta = deltaIN;
15410 UChar opc = getUChar(delta);
15411 delta++;
15412 switch (opc) {
15414 case 0x12:
15415 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15416 duplicating some lanes (2:2:0:0). */
15417 if (haveF3no66noF2(pfx) && sz == 4) {
15418 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15419 True/*isL*/ );
15420 goto decode_success;
15422 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15423 duplicating some lanes (0:1:0:1). */
15424 if (haveF2no66noF3(pfx)
15425 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
15426 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
15427 goto decode_success;
15429 break;
15431 case 0x16:
15432 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15433 duplicating some lanes (3:3:1:1). */
15434 if (haveF3no66noF2(pfx) && sz == 4) {
15435 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15436 False/*!isL*/ );
15437 goto decode_success;
15439 break;
15441 case 0x7C:
15442 case 0x7D:
15443 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15444 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15445 if (haveF2no66noF3(pfx) && sz == 4) {
15446 IRTemp eV = newTemp(Ity_V128);
15447 IRTemp gV = newTemp(Ity_V128);
15448 Bool isAdd = opc == 0x7C;
15449 const HChar* str = isAdd ? "add" : "sub";
15450 modrm = getUChar(delta);
15451 UInt rG = gregOfRexRM(pfx,modrm);
15452 if (epartIsReg(modrm)) {
15453 UInt rE = eregOfRexRM(pfx,modrm);
15454 assign( eV, getXMMReg(rE) );
15455 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15456 delta += 1;
15457 } else {
15458 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15459 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15460 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
15461 delta += alen;
15464 assign( gV, getXMMReg(rG) );
15465 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
15466 goto decode_success;
15468 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15469 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15470 if (have66noF2noF3(pfx) && sz == 2) {
15471 IRTemp eV = newTemp(Ity_V128);
15472 IRTemp gV = newTemp(Ity_V128);
15473 Bool isAdd = opc == 0x7C;
15474 const HChar* str = isAdd ? "add" : "sub";
15475 modrm = getUChar(delta);
15476 UInt rG = gregOfRexRM(pfx,modrm);
15477 if (epartIsReg(modrm)) {
15478 UInt rE = eregOfRexRM(pfx,modrm);
15479 assign( eV, getXMMReg(rE) );
15480 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15481 delta += 1;
15482 } else {
15483 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15484 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15485 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
15486 delta += alen;
15489 assign( gV, getXMMReg(rG) );
15490 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
15491 goto decode_success;
15493 break;
15495 case 0xD0:
15496 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15497 if (have66noF2noF3(pfx) && sz == 2) {
15498 IRTemp eV = newTemp(Ity_V128);
15499 IRTemp gV = newTemp(Ity_V128);
15500 modrm = getUChar(delta);
15501 UInt rG = gregOfRexRM(pfx,modrm);
15502 if (epartIsReg(modrm)) {
15503 UInt rE = eregOfRexRM(pfx,modrm);
15504 assign( eV, getXMMReg(rE) );
15505 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15506 delta += 1;
15507 } else {
15508 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15509 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15510 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
15511 delta += alen;
15514 assign( gV, getXMMReg(rG) );
15515 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
15516 goto decode_success;
15518 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15519 if (haveF2no66noF3(pfx) && sz == 4) {
15520 IRTemp eV = newTemp(Ity_V128);
15521 IRTemp gV = newTemp(Ity_V128);
15522 modrm = getUChar(delta);
15523 UInt rG = gregOfRexRM(pfx,modrm);
15525 modrm = getUChar(delta);
15526 if (epartIsReg(modrm)) {
15527 UInt rE = eregOfRexRM(pfx,modrm);
15528 assign( eV, getXMMReg(rE) );
15529 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15530 delta += 1;
15531 } else {
15532 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15533 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15534 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
15535 delta += alen;
15538 assign( gV, getXMMReg(rG) );
15539 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
15540 goto decode_success;
15542 break;
15544 case 0xF0:
15545 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15546 if (haveF2no66noF3(pfx) && sz == 4) {
15547 modrm = getUChar(delta);
15548 if (epartIsReg(modrm)) {
15549 goto decode_failure;
15550 } else {
15551 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15552 putXMMReg( gregOfRexRM(pfx,modrm),
15553 loadLE(Ity_V128, mkexpr(addr)) );
15554 DIP("lddqu %s,%s\n", dis_buf,
15555 nameXMMReg(gregOfRexRM(pfx,modrm)));
15556 delta += alen;
15558 goto decode_success;
15560 break;
15562 default:
15563 goto decode_failure;
15567 decode_failure:
15568 *decode_OK = False;
15569 return deltaIN;
15571 decode_success:
15572 *decode_OK = True;
15573 return delta;
15577 /*------------------------------------------------------------*/
15578 /*--- ---*/
15579 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15580 /*--- ---*/
15581 /*------------------------------------------------------------*/
15583 static
15584 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15586 IRTemp sHi = newTemp(Ity_I64);
15587 IRTemp sLo = newTemp(Ity_I64);
15588 IRTemp dHi = newTemp(Ity_I64);
15589 IRTemp dLo = newTemp(Ity_I64);
15590 IRTemp rHi = newTemp(Ity_I64);
15591 IRTemp rLo = newTemp(Ity_I64);
15592 IRTemp sevens = newTemp(Ity_I64);
15593 IRTemp mask0x80hi = newTemp(Ity_I64);
15594 IRTemp mask0x80lo = newTemp(Ity_I64);
15595 IRTemp maskBit3hi = newTemp(Ity_I64);
15596 IRTemp maskBit3lo = newTemp(Ity_I64);
15597 IRTemp sAnd7hi = newTemp(Ity_I64);
15598 IRTemp sAnd7lo = newTemp(Ity_I64);
15599 IRTemp permdHi = newTemp(Ity_I64);
15600 IRTemp permdLo = newTemp(Ity_I64);
15601 IRTemp res = newTemp(Ity_V128);
15603 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15604 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15605 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15606 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15608 assign( sevens, mkU64(0x0707070707070707ULL) );
15610 /* mask0x80hi = Not(SarN8x8(sHi,7))
15611 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
15612 sAnd7hi = And(sHi,sevens)
15613 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
15614 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
15615 rHi = And(permdHi,mask0x80hi)
15617 assign(
15618 mask0x80hi,
15619 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
15621 assign(
15622 maskBit3hi,
15623 binop(Iop_SarN8x8,
15624 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
15625 mkU8(7)));
15627 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
15629 assign(
15630 permdHi,
15631 binop(
15632 Iop_Or64,
15633 binop(Iop_And64,
15634 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
15635 mkexpr(maskBit3hi)),
15636 binop(Iop_And64,
15637 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
15638 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
15640 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
15642 /* And the same for the lower half of the result. What fun. */
15644 assign(
15645 mask0x80lo,
15646 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
15648 assign(
15649 maskBit3lo,
15650 binop(Iop_SarN8x8,
15651 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
15652 mkU8(7)));
15654 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
15656 assign(
15657 permdLo,
15658 binop(
15659 Iop_Or64,
15660 binop(Iop_And64,
15661 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
15662 mkexpr(maskBit3lo)),
15663 binop(Iop_And64,
15664 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
15665 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
15667 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
15669 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
15670 return res;
15674 static
15675 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15677 IRTemp sHi, sLo, dHi, dLo;
15678 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15679 breakupV256toV128s( dV, &dHi, &dLo);
15680 breakupV256toV128s( sV, &sHi, &sLo);
15681 IRTemp res = newTemp(Ity_V256);
15682 assign(res, binop(Iop_V128HLtoV256,
15683 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15684 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15685 return res;
15689 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15690 Bool isAvx, UChar opc )
15692 IRTemp addr = IRTemp_INVALID;
15693 Int alen = 0;
15694 HChar dis_buf[50];
15695 const HChar* str = "???";
15696 IROp opV64 = Iop_INVALID;
15697 IROp opCatO = Iop_CatOddLanes16x4;
15698 IROp opCatE = Iop_CatEvenLanes16x4;
15699 IRTemp sV = newTemp(Ity_V128);
15700 IRTemp dV = newTemp(Ity_V128);
15701 IRTemp sHi = newTemp(Ity_I64);
15702 IRTemp sLo = newTemp(Ity_I64);
15703 IRTemp dHi = newTemp(Ity_I64);
15704 IRTemp dLo = newTemp(Ity_I64);
15705 UChar modrm = getUChar(delta);
15706 UInt rG = gregOfRexRM(pfx,modrm);
15707 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15709 switch (opc) {
15710 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15711 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15712 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15713 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15714 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15715 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15716 default: vassert(0);
15718 if (opc == 0x02 || opc == 0x06) {
15719 opCatO = Iop_InterleaveHI32x2;
15720 opCatE = Iop_InterleaveLO32x2;
15723 assign( dV, getXMMReg(rV) );
15725 if (epartIsReg(modrm)) {
15726 UInt rE = eregOfRexRM(pfx,modrm);
15727 assign( sV, getXMMReg(rE) );
15728 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15729 nameXMMReg(rE), nameXMMReg(rG));
15730 delta += 1;
15731 } else {
15732 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15733 if (!isAvx)
15734 gen_SEGV_if_not_16_aligned( addr );
15735 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15736 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15737 dis_buf, nameXMMReg(rG));
15738 delta += alen;
15741 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15742 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15743 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15744 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15746 /* This isn't a particularly efficient way to compute the
15747 result, but at least it avoids a proliferation of IROps,
15748 hence avoids complication all the backends. */
15750 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15751 ( rG,
15752 binop(Iop_64HLtoV128,
15753 binop(opV64,
15754 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15755 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15756 binop(opV64,
15757 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15758 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15759 return delta;
15763 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15764 UChar opc )
15766 IRTemp addr = IRTemp_INVALID;
15767 Int alen = 0;
15768 HChar dis_buf[50];
15769 const HChar* str = "???";
15770 IROp opV64 = Iop_INVALID;
15771 IROp opCatO = Iop_CatOddLanes16x4;
15772 IROp opCatE = Iop_CatEvenLanes16x4;
15773 IRTemp sV = newTemp(Ity_V256);
15774 IRTemp dV = newTemp(Ity_V256);
15775 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15776 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15777 UChar modrm = getUChar(delta);
15778 UInt rG = gregOfRexRM(pfx,modrm);
15779 UInt rV = getVexNvvvv(pfx);
15781 switch (opc) {
15782 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15783 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15784 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15785 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15786 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15787 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15788 default: vassert(0);
15790 if (opc == 0x02 || opc == 0x06) {
15791 opCatO = Iop_InterleaveHI32x2;
15792 opCatE = Iop_InterleaveLO32x2;
15795 assign( dV, getYMMReg(rV) );
15797 if (epartIsReg(modrm)) {
15798 UInt rE = eregOfRexRM(pfx,modrm);
15799 assign( sV, getYMMReg(rE) );
15800 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15801 delta += 1;
15802 } else {
15803 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15804 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15805 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15806 delta += alen;
15809 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15810 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15812 /* This isn't a particularly efficient way to compute the
15813 result, but at least it avoids a proliferation of IROps,
15814 hence avoids complication all the backends. */
15816 putYMMReg( rG,
15817 binop(Iop_V128HLtoV256,
15818 binop(Iop_64HLtoV128,
15819 binop(opV64,
15820 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15821 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15822 binop(opV64,
15823 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15824 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15825 binop(Iop_64HLtoV128,
15826 binop(opV64,
15827 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15828 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15829 binop(opV64,
15830 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15831 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15832 return delta;
15836 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15838 IRTemp sVoddsSX = newTemp(Ity_V128);
15839 IRTemp sVevensSX = newTemp(Ity_V128);
15840 IRTemp dVoddsZX = newTemp(Ity_V128);
15841 IRTemp dVevensZX = newTemp(Ity_V128);
15842 /* compute dV unsigned x sV signed */
15843 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
15844 assign( sVevensSX, binop(Iop_SarN16x8,
15845 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
15846 mkU8(8)) );
15847 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
15848 assign( dVevensZX, binop(Iop_ShrN16x8,
15849 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
15850 mkU8(8)) );
15852 IRTemp res = newTemp(Ity_V128);
15853 assign( res, binop(Iop_QAdd16Sx8,
15854 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
15855 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
15858 return res;
15862 static
15863 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15865 IRTemp sHi, sLo, dHi, dLo;
15866 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15867 breakupV256toV128s( dV, &dHi, &dLo);
15868 breakupV256toV128s( sV, &sHi, &sLo);
15869 IRTemp res = newTemp(Ity_V256);
15870 assign(res, binop(Iop_V128HLtoV256,
15871 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15872 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15873 return res;
15877 __attribute__((noinline))
15878 static
15879 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
15880 const VexAbiInfo* vbi,
15881 Prefix pfx, Int sz, Long deltaIN )
15883 IRTemp addr = IRTemp_INVALID;
15884 UChar modrm = 0;
15885 Int alen = 0;
15886 HChar dis_buf[50];
15888 *decode_OK = False;
15890 Long delta = deltaIN;
15891 UChar opc = getUChar(delta);
15892 delta++;
15893 switch (opc) {
15895 case 0x00:
15896 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15897 if (have66noF2noF3(pfx)
15898 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15899 IRTemp sV = newTemp(Ity_V128);
15900 IRTemp dV = newTemp(Ity_V128);
15902 modrm = getUChar(delta);
15903 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15905 if (epartIsReg(modrm)) {
15906 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15907 delta += 1;
15908 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15909 nameXMMReg(gregOfRexRM(pfx,modrm)));
15910 } else {
15911 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15912 gen_SEGV_if_not_16_aligned( addr );
15913 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15914 delta += alen;
15915 DIP("pshufb %s,%s\n", dis_buf,
15916 nameXMMReg(gregOfRexRM(pfx,modrm)));
15919 IRTemp res = math_PSHUFB_XMM( dV, sV );
15920 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
15921 goto decode_success;
15923 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15924 if (haveNo66noF2noF3(pfx) && sz == 4) {
15925 IRTemp sV = newTemp(Ity_I64);
15926 IRTemp dV = newTemp(Ity_I64);
15928 modrm = getUChar(delta);
15929 do_MMX_preamble();
15930 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15932 if (epartIsReg(modrm)) {
15933 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15934 delta += 1;
15935 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15936 nameMMXReg(gregLO3ofRM(modrm)));
15937 } else {
15938 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15939 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15940 delta += alen;
15941 DIP("pshufb %s,%s\n", dis_buf,
15942 nameMMXReg(gregLO3ofRM(modrm)));
15945 putMMXReg(
15946 gregLO3ofRM(modrm),
15947 binop(
15948 Iop_And64,
15949 /* permute the lanes */
15950 binop(
15951 Iop_Perm8x8,
15952 mkexpr(dV),
15953 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
15955 /* mask off lanes which have (index & 0x80) == 0x80 */
15956 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
15959 goto decode_success;
15961 break;
15963 case 0x01:
15964 case 0x02:
15965 case 0x03:
15966 case 0x05:
15967 case 0x06:
15968 case 0x07:
15969 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15970 G to G (xmm). */
15971 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15972 G to G (xmm). */
15973 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15974 xmm) and G to G (xmm). */
15975 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15976 G to G (xmm). */
15977 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15978 G to G (xmm). */
15979 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15980 xmm) and G to G (xmm). */
15981 if (have66noF2noF3(pfx)
15982 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15983 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
15984 goto decode_success;
15986 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15987 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15988 to G (mmx). */
15989 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15990 to G (mmx). */
15991 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15992 mmx) and G to G (mmx). */
15993 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15994 to G (mmx). */
15995 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15996 to G (mmx). */
15997 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15998 mmx) and G to G (mmx). */
15999 if (haveNo66noF2noF3(pfx) && sz == 4) {
16000 const HChar* str = "???";
16001 IROp opV64 = Iop_INVALID;
16002 IROp opCatO = Iop_CatOddLanes16x4;
16003 IROp opCatE = Iop_CatEvenLanes16x4;
16004 IRTemp sV = newTemp(Ity_I64);
16005 IRTemp dV = newTemp(Ity_I64);
16007 modrm = getUChar(delta);
16009 switch (opc) {
16010 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
16011 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
16012 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
16013 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
16014 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
16015 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
16016 default: vassert(0);
16018 if (opc == 0x02 || opc == 0x06) {
16019 opCatO = Iop_InterleaveHI32x2;
16020 opCatE = Iop_InterleaveLO32x2;
16023 do_MMX_preamble();
16024 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16026 if (epartIsReg(modrm)) {
16027 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16028 delta += 1;
16029 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16030 nameMMXReg(gregLO3ofRM(modrm)));
16031 } else {
16032 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16033 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16034 delta += alen;
16035 DIP("ph%s %s,%s\n", str, dis_buf,
16036 nameMMXReg(gregLO3ofRM(modrm)));
16039 putMMXReg(
16040 gregLO3ofRM(modrm),
16041 binop(opV64,
16042 binop(opCatE,mkexpr(sV),mkexpr(dV)),
16043 binop(opCatO,mkexpr(sV),mkexpr(dV))
16046 goto decode_success;
16048 break;
16050 case 0x04:
16051 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16052 Unsigned Bytes (XMM) */
16053 if (have66noF2noF3(pfx)
16054 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16055 IRTemp sV = newTemp(Ity_V128);
16056 IRTemp dV = newTemp(Ity_V128);
16057 modrm = getUChar(delta);
16058 UInt rG = gregOfRexRM(pfx,modrm);
16060 assign( dV, getXMMReg(rG) );
16062 if (epartIsReg(modrm)) {
16063 UInt rE = eregOfRexRM(pfx,modrm);
16064 assign( sV, getXMMReg(rE) );
16065 delta += 1;
16066 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
16067 } else {
16068 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16069 gen_SEGV_if_not_16_aligned( addr );
16070 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16071 delta += alen;
16072 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
16075 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
16076 goto decode_success;
16078 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16079 Unsigned Bytes (MMX) */
16080 if (haveNo66noF2noF3(pfx) && sz == 4) {
16081 IRTemp sV = newTemp(Ity_I64);
16082 IRTemp dV = newTemp(Ity_I64);
16083 IRTemp sVoddsSX = newTemp(Ity_I64);
16084 IRTemp sVevensSX = newTemp(Ity_I64);
16085 IRTemp dVoddsZX = newTemp(Ity_I64);
16086 IRTemp dVevensZX = newTemp(Ity_I64);
16088 modrm = getUChar(delta);
16089 do_MMX_preamble();
16090 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16092 if (epartIsReg(modrm)) {
16093 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16094 delta += 1;
16095 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16096 nameMMXReg(gregLO3ofRM(modrm)));
16097 } else {
16098 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16099 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16100 delta += alen;
16101 DIP("pmaddubsw %s,%s\n", dis_buf,
16102 nameMMXReg(gregLO3ofRM(modrm)));
16105 /* compute dV unsigned x sV signed */
16106 assign( sVoddsSX,
16107 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
16108 assign( sVevensSX,
16109 binop(Iop_SarN16x4,
16110 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
16111 mkU8(8)) );
16112 assign( dVoddsZX,
16113 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
16114 assign( dVevensZX,
16115 binop(Iop_ShrN16x4,
16116 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
16117 mkU8(8)) );
16119 putMMXReg(
16120 gregLO3ofRM(modrm),
16121 binop(Iop_QAdd16Sx4,
16122 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
16123 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
16126 goto decode_success;
16128 break;
16130 case 0x08:
16131 case 0x09:
16132 case 0x0A:
16133 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16134 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16135 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16136 if (have66noF2noF3(pfx)
16137 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16138 IRTemp sV = newTemp(Ity_V128);
16139 IRTemp dV = newTemp(Ity_V128);
16140 IRTemp sHi = newTemp(Ity_I64);
16141 IRTemp sLo = newTemp(Ity_I64);
16142 IRTemp dHi = newTemp(Ity_I64);
16143 IRTemp dLo = newTemp(Ity_I64);
16144 const HChar* str = "???";
16145 Int laneszB = 0;
16147 switch (opc) {
16148 case 0x08: laneszB = 1; str = "b"; break;
16149 case 0x09: laneszB = 2; str = "w"; break;
16150 case 0x0A: laneszB = 4; str = "d"; break;
16151 default: vassert(0);
16154 modrm = getUChar(delta);
16155 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16157 if (epartIsReg(modrm)) {
16158 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16159 delta += 1;
16160 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16161 nameXMMReg(gregOfRexRM(pfx,modrm)));
16162 } else {
16163 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16164 gen_SEGV_if_not_16_aligned( addr );
16165 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16166 delta += alen;
16167 DIP("psign%s %s,%s\n", str, dis_buf,
16168 nameXMMReg(gregOfRexRM(pfx,modrm)));
16171 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16172 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16173 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16174 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16176 putXMMReg(
16177 gregOfRexRM(pfx,modrm),
16178 binop(Iop_64HLtoV128,
16179 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
16180 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
16183 goto decode_success;
16185 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16186 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16187 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16188 if (haveNo66noF2noF3(pfx) && sz == 4) {
16189 IRTemp sV = newTemp(Ity_I64);
16190 IRTemp dV = newTemp(Ity_I64);
16191 const HChar* str = "???";
16192 Int laneszB = 0;
16194 switch (opc) {
16195 case 0x08: laneszB = 1; str = "b"; break;
16196 case 0x09: laneszB = 2; str = "w"; break;
16197 case 0x0A: laneszB = 4; str = "d"; break;
16198 default: vassert(0);
16201 modrm = getUChar(delta);
16202 do_MMX_preamble();
16203 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16205 if (epartIsReg(modrm)) {
16206 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16207 delta += 1;
16208 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16209 nameMMXReg(gregLO3ofRM(modrm)));
16210 } else {
16211 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16212 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16213 delta += alen;
16214 DIP("psign%s %s,%s\n", str, dis_buf,
16215 nameMMXReg(gregLO3ofRM(modrm)));
16218 putMMXReg(
16219 gregLO3ofRM(modrm),
16220 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
16222 goto decode_success;
16224 break;
16226 case 0x0B:
16227 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16228 Scale (XMM) */
16229 if (have66noF2noF3(pfx)
16230 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16231 IRTemp sV = newTemp(Ity_V128);
16232 IRTemp dV = newTemp(Ity_V128);
16233 IRTemp sHi = newTemp(Ity_I64);
16234 IRTemp sLo = newTemp(Ity_I64);
16235 IRTemp dHi = newTemp(Ity_I64);
16236 IRTemp dLo = newTemp(Ity_I64);
16238 modrm = getUChar(delta);
16239 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16241 if (epartIsReg(modrm)) {
16242 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16243 delta += 1;
16244 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
16245 nameXMMReg(gregOfRexRM(pfx,modrm)));
16246 } else {
16247 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16248 gen_SEGV_if_not_16_aligned( addr );
16249 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16250 delta += alen;
16251 DIP("pmulhrsw %s,%s\n", dis_buf,
16252 nameXMMReg(gregOfRexRM(pfx,modrm)));
16255 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16256 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16257 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16258 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16260 putXMMReg(
16261 gregOfRexRM(pfx,modrm),
16262 binop(Iop_64HLtoV128,
16263 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
16264 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
16267 goto decode_success;
16269 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16270 (MMX) */
16271 if (haveNo66noF2noF3(pfx) && sz == 4) {
16272 IRTemp sV = newTemp(Ity_I64);
16273 IRTemp dV = newTemp(Ity_I64);
16275 modrm = getUChar(delta);
16276 do_MMX_preamble();
16277 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16279 if (epartIsReg(modrm)) {
16280 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16281 delta += 1;
16282 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16283 nameMMXReg(gregLO3ofRM(modrm)));
16284 } else {
16285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16286 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16287 delta += alen;
16288 DIP("pmulhrsw %s,%s\n", dis_buf,
16289 nameMMXReg(gregLO3ofRM(modrm)));
16292 putMMXReg(
16293 gregLO3ofRM(modrm),
16294 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
16296 goto decode_success;
16298 break;
16300 case 0x1C:
16301 case 0x1D:
16302 case 0x1E:
16303 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16304 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16305 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16306 if (have66noF2noF3(pfx)
16307 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16308 IRTemp sV = newTemp(Ity_V128);
16309 const HChar* str = "???";
16310 Int laneszB = 0;
16312 switch (opc) {
16313 case 0x1C: laneszB = 1; str = "b"; break;
16314 case 0x1D: laneszB = 2; str = "w"; break;
16315 case 0x1E: laneszB = 4; str = "d"; break;
16316 default: vassert(0);
16319 modrm = getUChar(delta);
16320 if (epartIsReg(modrm)) {
16321 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16322 delta += 1;
16323 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16324 nameXMMReg(gregOfRexRM(pfx,modrm)));
16325 } else {
16326 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16327 gen_SEGV_if_not_16_aligned( addr );
16328 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16329 delta += alen;
16330 DIP("pabs%s %s,%s\n", str, dis_buf,
16331 nameXMMReg(gregOfRexRM(pfx,modrm)));
16334 putXMMReg( gregOfRexRM(pfx,modrm),
16335 mkexpr(math_PABS_XMM(sV, laneszB)) );
16336 goto decode_success;
16338 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16339 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16340 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16341 if (haveNo66noF2noF3(pfx) && sz == 4) {
16342 IRTemp sV = newTemp(Ity_I64);
16343 const HChar* str = "???";
16344 Int laneszB = 0;
16346 switch (opc) {
16347 case 0x1C: laneszB = 1; str = "b"; break;
16348 case 0x1D: laneszB = 2; str = "w"; break;
16349 case 0x1E: laneszB = 4; str = "d"; break;
16350 default: vassert(0);
16353 modrm = getUChar(delta);
16354 do_MMX_preamble();
16356 if (epartIsReg(modrm)) {
16357 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16358 delta += 1;
16359 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16360 nameMMXReg(gregLO3ofRM(modrm)));
16361 } else {
16362 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16363 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16364 delta += alen;
16365 DIP("pabs%s %s,%s\n", str, dis_buf,
16366 nameMMXReg(gregLO3ofRM(modrm)));
16369 putMMXReg( gregLO3ofRM(modrm),
16370 mkexpr(math_PABS_MMX( sV, laneszB )) );
16371 goto decode_success;
16373 break;
16375 default:
16376 break;
16380 //decode_failure:
16381 *decode_OK = False;
16382 return deltaIN;
16384 decode_success:
16385 *decode_OK = True;
16386 return delta;
16390 /*------------------------------------------------------------*/
16391 /*--- ---*/
16392 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16393 /*--- ---*/
16394 /*------------------------------------------------------------*/
16396 __attribute__((noinline))
16397 static
16398 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
16399 const VexAbiInfo* vbi,
16400 Prefix pfx, Int sz, Long deltaIN )
16402 Long d64 = 0;
16403 IRTemp addr = IRTemp_INVALID;
16404 UChar modrm = 0;
16405 Int alen = 0;
16406 HChar dis_buf[50];
16408 *decode_OK = False;
16410 Long delta = deltaIN;
16411 UChar opc = getUChar(delta);
16412 delta++;
16413 switch (opc) {
16415 case 0x0F:
16416 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16417 if (have66noF2noF3(pfx)
16418 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16419 IRTemp sV = newTemp(Ity_V128);
16420 IRTemp dV = newTemp(Ity_V128);
16422 modrm = getUChar(delta);
16423 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16425 if (epartIsReg(modrm)) {
16426 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16427 d64 = (Long)getUChar(delta+1);
16428 delta += 1+1;
16429 DIP("palignr $%lld,%s,%s\n", d64,
16430 nameXMMReg(eregOfRexRM(pfx,modrm)),
16431 nameXMMReg(gregOfRexRM(pfx,modrm)));
16432 } else {
16433 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16434 gen_SEGV_if_not_16_aligned( addr );
16435 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16436 d64 = (Long)getUChar(delta+alen);
16437 delta += alen+1;
16438 DIP("palignr $%lld,%s,%s\n", d64,
16439 dis_buf,
16440 nameXMMReg(gregOfRexRM(pfx,modrm)));
16443 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
16444 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
16445 goto decode_success;
16447 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16448 if (haveNo66noF2noF3(pfx) && sz == 4) {
16449 IRTemp sV = newTemp(Ity_I64);
16450 IRTemp dV = newTemp(Ity_I64);
16451 IRTemp res = newTemp(Ity_I64);
16453 modrm = getUChar(delta);
16454 do_MMX_preamble();
16455 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16457 if (epartIsReg(modrm)) {
16458 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16459 d64 = (Long)getUChar(delta+1);
16460 delta += 1+1;
16461 DIP("palignr $%lld,%s,%s\n", d64,
16462 nameMMXReg(eregLO3ofRM(modrm)),
16463 nameMMXReg(gregLO3ofRM(modrm)));
16464 } else {
16465 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16466 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16467 d64 = (Long)getUChar(delta+alen);
16468 delta += alen+1;
16469 DIP("palignr $%lld%s,%s\n", d64,
16470 dis_buf,
16471 nameMMXReg(gregLO3ofRM(modrm)));
16474 if (d64 == 0) {
16475 assign( res, mkexpr(sV) );
16477 else if (d64 >= 1 && d64 <= 7) {
16478 assign(res,
16479 binop(Iop_Or64,
16480 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
16481 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
16482 )));
16484 else if (d64 == 8) {
16485 assign( res, mkexpr(dV) );
16487 else if (d64 >= 9 && d64 <= 15) {
16488 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
16490 else if (d64 >= 16 && d64 <= 255) {
16491 assign( res, mkU64(0) );
16493 else
16494 vassert(0);
16496 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
16497 goto decode_success;
16499 break;
16501 default:
16502 break;
16506 //decode_failure:
16507 *decode_OK = False;
16508 return deltaIN;
16510 decode_success:
16511 *decode_OK = True;
16512 return delta;
16516 /*------------------------------------------------------------*/
16517 /*--- ---*/
16518 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16519 /*--- ---*/
16520 /*------------------------------------------------------------*/
16522 __attribute__((noinline))
16523 static
16524 Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
16525 const VexArchInfo* archinfo,
16526 const VexAbiInfo* vbi,
16527 Prefix pfx, Int sz, Long deltaIN )
16529 IRTemp addr = IRTemp_INVALID;
16530 IRType ty = Ity_INVALID;
16531 UChar modrm = 0;
16532 Int alen = 0;
16533 HChar dis_buf[50];
16535 *decode_OK = False;
16537 Long delta = deltaIN;
16538 UChar opc = getUChar(delta);
16539 delta++;
16540 switch (opc) {
16542 case 0xB8:
16543 /* F3 0F B8 = POPCNT{W,L,Q}
16544 Count the number of 1 bits in a register
16546 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
16547 && (sz == 2 || sz == 4 || sz == 8)) {
16548 /*IRType*/ ty = szToITy(sz);
16549 IRTemp src = newTemp(ty);
16550 modrm = getUChar(delta);
16551 if (epartIsReg(modrm)) {
16552 assign(src, getIRegE(sz, pfx, modrm));
16553 delta += 1;
16554 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16555 nameIRegG(sz, pfx, modrm));
16556 } else {
16557 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16558 assign(src, loadLE(ty, mkexpr(addr)));
16559 delta += alen;
16560 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
16561 nameIRegG(sz, pfx, modrm));
16564 IRTemp result = gen_POPCOUNT(ty, src);
16565 putIRegG(sz, pfx, modrm, mkexpr(result));
16567 // Update flags. This is pretty lame .. perhaps can do better
16568 // if this turns out to be performance critical.
16569 // O S A C P are cleared. Z is set if SRC == 0.
16570 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16571 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16572 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16573 stmt( IRStmt_Put( OFFB_CC_DEP1,
16574 binop(Iop_Shl64,
16575 unop(Iop_1Uto64,
16576 binop(Iop_CmpEQ64,
16577 widenUto64(mkexpr(src)),
16578 mkU64(0))),
16579 mkU8(AMD64G_CC_SHIFT_Z))));
16581 goto decode_success;
16583 break;
16585 case 0xBC:
16586 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16587 which we can only decode if we're sure this is a BMI1 capable cpu
16588 that supports TZCNT, since otherwise it's BSF, which behaves
16589 differently on zero source. */
16590 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16591 && (sz == 2 || sz == 4 || sz == 8)
16592 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16593 /*IRType*/ ty = szToITy(sz);
16594 IRTemp src = newTemp(ty);
16595 modrm = getUChar(delta);
16596 if (epartIsReg(modrm)) {
16597 assign(src, getIRegE(sz, pfx, modrm));
16598 delta += 1;
16599 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16600 nameIRegG(sz, pfx, modrm));
16601 } else {
16602 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16603 assign(src, loadLE(ty, mkexpr(addr)));
16604 delta += alen;
16605 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16606 nameIRegG(sz, pfx, modrm));
16609 IRTemp res = gen_TZCNT(ty, src);
16610 putIRegG(sz, pfx, modrm, mkexpr(res));
16612 // Update flags. This is pretty lame .. perhaps can do better
16613 // if this turns out to be performance critical.
16614 // O S A P are cleared. Z is set if RESULT == 0.
16615 // C is set if SRC is zero.
16616 IRTemp src64 = newTemp(Ity_I64);
16617 IRTemp res64 = newTemp(Ity_I64);
16618 assign(src64, widenUto64(mkexpr(src)));
16619 assign(res64, widenUto64(mkexpr(res)));
16621 IRTemp oszacp = newTemp(Ity_I64);
16622 assign(
16623 oszacp,
16624 binop(Iop_Or64,
16625 binop(Iop_Shl64,
16626 unop(Iop_1Uto64,
16627 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16628 mkU8(AMD64G_CC_SHIFT_Z)),
16629 binop(Iop_Shl64,
16630 unop(Iop_1Uto64,
16631 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16632 mkU8(AMD64G_CC_SHIFT_C))
16636 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16637 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16638 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16639 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16641 goto decode_success;
16643 break;
16645 case 0xBD:
16646 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16647 which we can only decode if we're sure this is an AMD cpu
16648 that supports LZCNT, since otherwise it's BSR, which behaves
16649 differently. Bizarrely, my Sandy Bridge also accepts these
16650 instructions but produces different results. */
16651 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16652 && (sz == 2 || sz == 4 || sz == 8)
16653 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16654 /*IRType*/ ty = szToITy(sz);
16655 IRTemp src = newTemp(ty);
16656 modrm = getUChar(delta);
16657 if (epartIsReg(modrm)) {
16658 assign(src, getIRegE(sz, pfx, modrm));
16659 delta += 1;
16660 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16661 nameIRegG(sz, pfx, modrm));
16662 } else {
16663 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16664 assign(src, loadLE(ty, mkexpr(addr)));
16665 delta += alen;
16666 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16667 nameIRegG(sz, pfx, modrm));
16670 IRTemp res = gen_LZCNT(ty, src);
16671 putIRegG(sz, pfx, modrm, mkexpr(res));
16673 // Update flags. This is pretty lame .. perhaps can do better
16674 // if this turns out to be performance critical.
16675 // O S A P are cleared. Z is set if RESULT == 0.
16676 // C is set if SRC is zero.
16677 IRTemp src64 = newTemp(Ity_I64);
16678 IRTemp res64 = newTemp(Ity_I64);
16679 assign(src64, widenUto64(mkexpr(src)));
16680 assign(res64, widenUto64(mkexpr(res)));
16682 IRTemp oszacp = newTemp(Ity_I64);
16683 assign(
16684 oszacp,
16685 binop(Iop_Or64,
16686 binop(Iop_Shl64,
16687 unop(Iop_1Uto64,
16688 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16689 mkU8(AMD64G_CC_SHIFT_Z)),
16690 binop(Iop_Shl64,
16691 unop(Iop_1Uto64,
16692 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16693 mkU8(AMD64G_CC_SHIFT_C))
16697 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16698 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16699 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16700 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16702 goto decode_success;
16704 break;
16706 default:
16707 break;
16711 //decode_failure:
16712 *decode_OK = False;
16713 return deltaIN;
16715 decode_success:
16716 *decode_OK = True;
16717 return delta;
16721 /*------------------------------------------------------------*/
16722 /*--- ---*/
16723 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16724 /*--- ---*/
16725 /*------------------------------------------------------------*/
16727 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16728 IRTemp vec0/*controlling mask*/,
16729 UInt gran, IROp opSAR )
16731 /* The tricky bit is to convert vec0 into a suitable mask, by
16732 copying the most significant bit of each lane into all positions
16733 in the lane. */
16734 IRTemp sh = newTemp(Ity_I8);
16735 assign(sh, mkU8(8 * gran - 1));
16737 IRTemp mask = newTemp(Ity_V128);
16738 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16740 IRTemp notmask = newTemp(Ity_V128);
16741 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16743 IRTemp res = newTemp(Ity_V128);
16744 assign(res, binop(Iop_OrV128,
16745 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16746 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16747 return res;
16750 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16751 IRTemp vec0/*controlling mask*/,
16752 UInt gran, IROp opSAR128 )
16754 /* The tricky bit is to convert vec0 into a suitable mask, by
16755 copying the most significant bit of each lane into all positions
16756 in the lane. */
16757 IRTemp sh = newTemp(Ity_I8);
16758 assign(sh, mkU8(8 * gran - 1));
16760 IRTemp vec0Hi = IRTemp_INVALID;
16761 IRTemp vec0Lo = IRTemp_INVALID;
16762 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16764 IRTemp mask = newTemp(Ity_V256);
16765 assign(mask, binop(Iop_V128HLtoV256,
16766 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16767 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16769 IRTemp notmask = newTemp(Ity_V256);
16770 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16772 IRTemp res = newTemp(Ity_V256);
16773 assign(res, binop(Iop_OrV256,
16774 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16775 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16776 return res;
16779 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16780 const HChar *name, UInt gran, IROp opSAR )
16782 IRTemp addr = IRTemp_INVALID;
16783 Int alen = 0;
16784 HChar dis_buf[50];
16785 UChar modrm = getUChar(delta);
16786 UInt rG = gregOfRexRM(pfx, modrm);
16787 UInt rV = getVexNvvvv(pfx);
16788 UInt rIS4 = 0xFF; /* invalid */
16789 IRTemp vecE = newTemp(Ity_V128);
16790 IRTemp vecV = newTemp(Ity_V128);
16791 IRTemp vecIS4 = newTemp(Ity_V128);
16792 if (epartIsReg(modrm)) {
16793 delta++;
16794 UInt rE = eregOfRexRM(pfx, modrm);
16795 assign(vecE, getXMMReg(rE));
16796 UChar ib = getUChar(delta);
16797 rIS4 = (ib >> 4) & 0xF;
16798 DIP("%s %s,%s,%s,%s\n",
16799 name, nameXMMReg(rIS4), nameXMMReg(rE),
16800 nameXMMReg(rV), nameXMMReg(rG));
16801 } else {
16802 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16803 delta += alen;
16804 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16805 UChar ib = getUChar(delta);
16806 rIS4 = (ib >> 4) & 0xF;
16807 DIP("%s %s,%s,%s,%s\n",
16808 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16810 delta++;
16811 assign(vecV, getXMMReg(rV));
16812 assign(vecIS4, getXMMReg(rIS4));
16813 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16814 putYMMRegLoAndZU( rG, mkexpr(res) );
16815 return delta;
16818 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16819 const HChar *name, UInt gran, IROp opSAR128 )
16821 IRTemp addr = IRTemp_INVALID;
16822 Int alen = 0;
16823 HChar dis_buf[50];
16824 UChar modrm = getUChar(delta);
16825 UInt rG = gregOfRexRM(pfx, modrm);
16826 UInt rV = getVexNvvvv(pfx);
16827 UInt rIS4 = 0xFF; /* invalid */
16828 IRTemp vecE = newTemp(Ity_V256);
16829 IRTemp vecV = newTemp(Ity_V256);
16830 IRTemp vecIS4 = newTemp(Ity_V256);
16831 if (epartIsReg(modrm)) {
16832 delta++;
16833 UInt rE = eregOfRexRM(pfx, modrm);
16834 assign(vecE, getYMMReg(rE));
16835 UChar ib = getUChar(delta);
16836 rIS4 = (ib >> 4) & 0xF;
16837 DIP("%s %s,%s,%s,%s\n",
16838 name, nameYMMReg(rIS4), nameYMMReg(rE),
16839 nameYMMReg(rV), nameYMMReg(rG));
16840 } else {
16841 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16842 delta += alen;
16843 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16844 UChar ib = getUChar(delta);
16845 rIS4 = (ib >> 4) & 0xF;
16846 DIP("%s %s,%s,%s,%s\n",
16847 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16849 delta++;
16850 assign(vecV, getYMMReg(rV));
16851 assign(vecIS4, getYMMReg(rIS4));
16852 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16853 putYMMReg( rG, mkexpr(res) );
16854 return delta;
16857 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16859 /* Set Z=1 iff (vecE & vecG) == 0
16860 Set C=1 iff (vecE & not vecG) == 0
16863 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16865 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16866 and bottom 64-bits together. It relies on this trick:
16868 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16870 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16871 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16873 and so the OR of the above 2 exprs produces
16874 [a OR b, a OR b], from which we simply take the lower half.
16876 IRTemp and64 = newTemp(Ity_I64);
16877 IRTemp andn64 = newTemp(Ity_I64);
16879 assign(and64,
16880 unop(Iop_V128to64,
16881 binop(Iop_OrV128,
16882 binop(Iop_InterleaveLO64x2,
16883 mkexpr(andV), mkexpr(andV)),
16884 binop(Iop_InterleaveHI64x2,
16885 mkexpr(andV), mkexpr(andV)))));
16887 assign(andn64,
16888 unop(Iop_V128to64,
16889 binop(Iop_OrV128,
16890 binop(Iop_InterleaveLO64x2,
16891 mkexpr(andnV), mkexpr(andnV)),
16892 binop(Iop_InterleaveHI64x2,
16893 mkexpr(andnV), mkexpr(andnV)))));
16895 IRTemp z64 = newTemp(Ity_I64);
16896 IRTemp c64 = newTemp(Ity_I64);
16897 if (sign == 64) {
16898 /* When only interested in the most significant bit, just shift
16899 arithmetically right and negate. */
16900 assign(z64,
16901 unop(Iop_Not64,
16902 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16904 assign(c64,
16905 unop(Iop_Not64,
16906 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16907 } else {
16908 if (sign == 32) {
16909 /* When interested in bit 31 and bit 63, mask those bits and
16910 fallthrough into the PTEST handling. */
16911 IRTemp t0 = newTemp(Ity_I64);
16912 IRTemp t1 = newTemp(Ity_I64);
16913 IRTemp t2 = newTemp(Ity_I64);
16914 assign(t0, mkU64(0x8000000080000000ULL));
16915 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
16916 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
16917 and64 = t1;
16918 andn64 = t2;
16920 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16921 slice out the Z and C bits conveniently. We use the standard
16922 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16923 done by "(x | -x) >>s (word-size - 1)".
16925 assign(z64,
16926 unop(Iop_Not64,
16927 binop(Iop_Sar64,
16928 binop(Iop_Or64,
16929 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
16930 mkexpr(and64)), mkU8(63))));
16932 assign(c64,
16933 unop(Iop_Not64,
16934 binop(Iop_Sar64,
16935 binop(Iop_Or64,
16936 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
16937 mkexpr(andn64)), mkU8(63))));
16940 /* And finally, slice out the Z and C flags and set the flags
16941 thunk to COPY for them. OSAP are set to zero. */
16942 IRTemp newOSZACP = newTemp(Ity_I64);
16943 assign(newOSZACP,
16944 binop(Iop_Or64,
16945 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16946 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16948 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16949 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16950 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16951 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16955 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16956 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16957 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
16958 Long delta, Bool isAvx, Int sign )
16960 IRTemp addr = IRTemp_INVALID;
16961 Int alen = 0;
16962 HChar dis_buf[50];
16963 UChar modrm = getUChar(delta);
16964 UInt rG = gregOfRexRM(pfx, modrm);
16965 IRTemp vecE = newTemp(Ity_V128);
16966 IRTemp vecG = newTemp(Ity_V128);
16968 if ( epartIsReg(modrm) ) {
16969 UInt rE = eregOfRexRM(pfx, modrm);
16970 assign(vecE, getXMMReg(rE));
16971 delta += 1;
16972 DIP( "%s%stest%s %s,%s\n",
16973 isAvx ? "v" : "", sign == 0 ? "p" : "",
16974 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16975 nameXMMReg(rE), nameXMMReg(rG) );
16976 } else {
16977 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16978 if (!isAvx)
16979 gen_SEGV_if_not_16_aligned( addr );
16980 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16981 delta += alen;
16982 DIP( "%s%stest%s %s,%s\n",
16983 isAvx ? "v" : "", sign == 0 ? "p" : "",
16984 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16985 dis_buf, nameXMMReg(rG) );
16988 assign(vecG, getXMMReg(rG));
16990 /* Set Z=1 iff (vecE & vecG) == 0
16991 Set C=1 iff (vecE & not vecG) == 0
16994 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16995 IRTemp andV = newTemp(Ity_V128);
16996 IRTemp andnV = newTemp(Ity_V128);
16997 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16998 assign(andnV, binop(Iop_AndV128,
16999 mkexpr(vecE),
17000 binop(Iop_XorV128, mkexpr(vecG),
17001 mkV128(0xFFFF))));
17003 finish_xTESTy ( andV, andnV, sign );
17004 return delta;
17008 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
17009 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
17010 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx,
17011 Long delta, Int sign )
17013 IRTemp addr = IRTemp_INVALID;
17014 Int alen = 0;
17015 HChar dis_buf[50];
17016 UChar modrm = getUChar(delta);
17017 UInt rG = gregOfRexRM(pfx, modrm);
17018 IRTemp vecE = newTemp(Ity_V256);
17019 IRTemp vecG = newTemp(Ity_V256);
17021 if ( epartIsReg(modrm) ) {
17022 UInt rE = eregOfRexRM(pfx, modrm);
17023 assign(vecE, getYMMReg(rE));
17024 delta += 1;
17025 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
17026 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
17027 nameYMMReg(rE), nameYMMReg(rG) );
17028 } else {
17029 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17030 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
17031 delta += alen;
17032 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
17033 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
17034 dis_buf, nameYMMReg(rG) );
17037 assign(vecG, getYMMReg(rG));
17039 /* Set Z=1 iff (vecE & vecG) == 0
17040 Set C=1 iff (vecE & not vecG) == 0
17043 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
17044 IRTemp andV = newTemp(Ity_V256);
17045 IRTemp andnV = newTemp(Ity_V256);
17046 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
17047 assign(andnV, binop(Iop_AndV256,
17048 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
17050 IRTemp andVhi = IRTemp_INVALID;
17051 IRTemp andVlo = IRTemp_INVALID;
17052 IRTemp andnVhi = IRTemp_INVALID;
17053 IRTemp andnVlo = IRTemp_INVALID;
17054 breakupV256toV128s( andV, &andVhi, &andVlo );
17055 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
17057 IRTemp andV128 = newTemp(Ity_V128);
17058 IRTemp andnV128 = newTemp(Ity_V128);
17059 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
17060 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
17062 finish_xTESTy ( andV128, andnV128, sign );
17063 return delta;
17067 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
17068 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17069 Long delta, Bool isAvx, Bool xIsZ )
17071 IRTemp addr = IRTemp_INVALID;
17072 Int alen = 0;
17073 HChar dis_buf[50];
17074 IRTemp srcVec = newTemp(Ity_V128);
17075 UChar modrm = getUChar(delta);
17076 const HChar* mbV = isAvx ? "v" : "";
17077 const HChar how = xIsZ ? 'z' : 's';
17078 UInt rG = gregOfRexRM(pfx, modrm);
17079 if ( epartIsReg(modrm) ) {
17080 UInt rE = eregOfRexRM(pfx, modrm);
17081 assign( srcVec, getXMMReg(rE) );
17082 delta += 1;
17083 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17084 } else {
17085 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17086 assign( srcVec,
17087 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17088 delta += alen;
17089 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17092 IRExpr* res
17093 = xIsZ /* do math for either zero or sign extend */
17094 ? binop( Iop_InterleaveLO8x16,
17095 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17096 : binop( Iop_SarN16x8,
17097 binop( Iop_ShlN16x8,
17098 binop( Iop_InterleaveLO8x16,
17099 IRExpr_Const( IRConst_V128(0) ),
17100 mkexpr(srcVec) ),
17101 mkU8(8) ),
17102 mkU8(8) );
17104 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17106 return delta;
17110 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17111 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx,
17112 Long delta, Bool xIsZ )
17114 IRTemp addr = IRTemp_INVALID;
17115 Int alen = 0;
17116 HChar dis_buf[50];
17117 IRTemp srcVec = newTemp(Ity_V128);
17118 UChar modrm = getUChar(delta);
17119 UChar how = xIsZ ? 'z' : 's';
17120 UInt rG = gregOfRexRM(pfx, modrm);
17121 if ( epartIsReg(modrm) ) {
17122 UInt rE = eregOfRexRM(pfx, modrm);
17123 assign( srcVec, getXMMReg(rE) );
17124 delta += 1;
17125 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17126 } else {
17127 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17128 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17129 delta += alen;
17130 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17133 /* First do zero extend. */
17134 IRExpr* res
17135 = binop( Iop_V128HLtoV256,
17136 binop( Iop_InterleaveHI8x16,
17137 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17138 binop( Iop_InterleaveLO8x16,
17139 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17140 /* And if needed sign extension as well. */
17141 if (!xIsZ)
17142 res = binop( Iop_SarN16x16,
17143 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
17145 putYMMReg ( rG, res );
17147 return delta;
17151 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17152 Long delta, Bool isAvx, Bool xIsZ )
17154 IRTemp addr = IRTemp_INVALID;
17155 Int alen = 0;
17156 HChar dis_buf[50];
17157 IRTemp srcVec = newTemp(Ity_V128);
17158 UChar modrm = getUChar(delta);
17159 const HChar* mbV = isAvx ? "v" : "";
17160 const HChar how = xIsZ ? 'z' : 's';
17161 UInt rG = gregOfRexRM(pfx, modrm);
17163 if ( epartIsReg(modrm) ) {
17164 UInt rE = eregOfRexRM(pfx, modrm);
17165 assign( srcVec, getXMMReg(rE) );
17166 delta += 1;
17167 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17168 } else {
17169 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17170 assign( srcVec,
17171 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17172 delta += alen;
17173 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17176 IRExpr* res
17177 = binop( Iop_InterleaveLO16x8,
17178 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
17179 if (!xIsZ)
17180 res = binop(Iop_SarN32x4,
17181 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
17183 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17184 ( gregOfRexRM(pfx, modrm), res );
17186 return delta;
17190 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17191 Long delta, Bool xIsZ )
17193 IRTemp addr = IRTemp_INVALID;
17194 Int alen = 0;
17195 HChar dis_buf[50];
17196 IRTemp srcVec = newTemp(Ity_V128);
17197 UChar modrm = getUChar(delta);
17198 UChar how = xIsZ ? 'z' : 's';
17199 UInt rG = gregOfRexRM(pfx, modrm);
17201 if ( epartIsReg(modrm) ) {
17202 UInt rE = eregOfRexRM(pfx, modrm);
17203 assign( srcVec, getXMMReg(rE) );
17204 delta += 1;
17205 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17206 } else {
17207 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17208 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17209 delta += alen;
17210 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17213 IRExpr* res
17214 = binop( Iop_V128HLtoV256,
17215 binop( Iop_InterleaveHI16x8,
17216 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17217 binop( Iop_InterleaveLO16x8,
17218 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17219 if (!xIsZ)
17220 res = binop(Iop_SarN32x8,
17221 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
17223 putYMMReg ( rG, res );
17225 return delta;
17229 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17230 Long delta, Bool isAvx )
17232 IRTemp addr = IRTemp_INVALID;
17233 Int alen = 0;
17234 HChar dis_buf[50];
17235 IRTemp srcBytes = newTemp(Ity_I32);
17236 UChar modrm = getUChar(delta);
17237 const HChar* mbV = isAvx ? "v" : "";
17238 UInt rG = gregOfRexRM(pfx, modrm);
17240 if ( epartIsReg( modrm ) ) {
17241 UInt rE = eregOfRexRM(pfx, modrm);
17242 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17243 delta += 1;
17244 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17245 } else {
17246 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17247 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17248 delta += alen;
17249 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17252 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17253 ( rG, binop( Iop_64HLtoV128,
17254 unop( Iop_16Sto64,
17255 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
17256 unop( Iop_16Sto64,
17257 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
17258 return delta;
17262 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
17264 IRTemp addr = IRTemp_INVALID;
17265 Int alen = 0;
17266 HChar dis_buf[50];
17267 IRTemp srcBytes = newTemp(Ity_I64);
17268 UChar modrm = getUChar(delta);
17269 UInt rG = gregOfRexRM(pfx, modrm);
17270 IRTemp s3, s2, s1, s0;
17271 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17273 if ( epartIsReg( modrm ) ) {
17274 UInt rE = eregOfRexRM(pfx, modrm);
17275 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
17276 delta += 1;
17277 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17278 } else {
17279 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17280 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
17281 delta += alen;
17282 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17285 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
17286 putYMMReg( rG, binop( Iop_V128HLtoV256,
17287 binop( Iop_64HLtoV128,
17288 unop( Iop_16Sto64, mkexpr(s3) ),
17289 unop( Iop_16Sto64, mkexpr(s2) ) ),
17290 binop( Iop_64HLtoV128,
17291 unop( Iop_16Sto64, mkexpr(s1) ),
17292 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
17293 return delta;
17297 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17298 Long delta, Bool isAvx )
17300 IRTemp addr = IRTemp_INVALID;
17301 Int alen = 0;
17302 HChar dis_buf[50];
17303 IRTemp srcVec = newTemp(Ity_V128);
17304 UChar modrm = getUChar(delta);
17305 const HChar* mbV = isAvx ? "v" : "";
17306 UInt rG = gregOfRexRM(pfx, modrm);
17308 if ( epartIsReg( modrm ) ) {
17309 UInt rE = eregOfRexRM(pfx, modrm);
17310 assign( srcVec, getXMMReg(rE) );
17311 delta += 1;
17312 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17313 } else {
17314 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17315 assign( srcVec,
17316 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17317 delta += alen;
17318 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17321 IRTemp zeroVec = newTemp( Ity_V128 );
17322 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17324 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17325 ( rG, binop( Iop_InterleaveLO16x8,
17326 mkexpr(zeroVec),
17327 binop( Iop_InterleaveLO16x8,
17328 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
17329 return delta;
17333 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17334 Long delta )
17336 IRTemp addr = IRTemp_INVALID;
17337 Int alen = 0;
17338 HChar dis_buf[50];
17339 IRTemp srcVec = newTemp(Ity_V128);
17340 UChar modrm = getUChar(delta);
17341 UInt rG = gregOfRexRM(pfx, modrm);
17343 if ( epartIsReg( modrm ) ) {
17344 UInt rE = eregOfRexRM(pfx, modrm);
17345 assign( srcVec, getXMMReg(rE) );
17346 delta += 1;
17347 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17348 } else {
17349 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17350 assign( srcVec,
17351 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17352 delta += alen;
17353 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17356 IRTemp zeroVec = newTemp( Ity_V128 );
17357 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17359 putYMMReg( rG, binop( Iop_V128HLtoV256,
17360 binop( Iop_InterleaveHI16x8,
17361 mkexpr(zeroVec),
17362 binop( Iop_InterleaveLO16x8,
17363 mkexpr(zeroVec), mkexpr(srcVec) ) ),
17364 binop( Iop_InterleaveLO16x8,
17365 mkexpr(zeroVec),
17366 binop( Iop_InterleaveLO16x8,
17367 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17368 return delta;
17372 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17373 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17374 Long delta, Bool isAvx, Bool xIsZ )
17376 IRTemp addr = IRTemp_INVALID;
17377 Int alen = 0;
17378 HChar dis_buf[50];
17379 IRTemp srcI64 = newTemp(Ity_I64);
17380 IRTemp srcVec = newTemp(Ity_V128);
17381 UChar modrm = getUChar(delta);
17382 const HChar* mbV = isAvx ? "v" : "";
17383 const HChar how = xIsZ ? 'z' : 's';
17384 UInt rG = gregOfRexRM(pfx, modrm);
17385 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17386 thing in a V128, with arbitrary junk in the top 64 bits. Use
17387 one or both of them and let iropt clean up afterwards (as
17388 usual). */
17389 if ( epartIsReg(modrm) ) {
17390 UInt rE = eregOfRexRM(pfx, modrm);
17391 assign( srcVec, getXMMReg(rE) );
17392 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
17393 delta += 1;
17394 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17395 } else {
17396 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17397 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
17398 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
17399 delta += alen;
17400 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17403 IRExpr* res
17404 = xIsZ /* do math for either zero or sign extend */
17405 ? binop( Iop_InterleaveLO32x4,
17406 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17407 : binop( Iop_64HLtoV128,
17408 unop( Iop_32Sto64,
17409 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
17410 unop( Iop_32Sto64,
17411 unop( Iop_64to32, mkexpr(srcI64) ) ) );
17413 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17415 return delta;
17419 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17420 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17421 Long delta, Bool xIsZ )
17423 IRTemp addr = IRTemp_INVALID;
17424 Int alen = 0;
17425 HChar dis_buf[50];
17426 IRTemp srcVec = newTemp(Ity_V128);
17427 UChar modrm = getUChar(delta);
17428 UChar how = xIsZ ? 'z' : 's';
17429 UInt rG = gregOfRexRM(pfx, modrm);
17430 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17431 thing in a V128, with arbitrary junk in the top 64 bits. Use
17432 one or both of them and let iropt clean up afterwards (as
17433 usual). */
17434 if ( epartIsReg(modrm) ) {
17435 UInt rE = eregOfRexRM(pfx, modrm);
17436 assign( srcVec, getXMMReg(rE) );
17437 delta += 1;
17438 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17439 } else {
17440 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17441 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
17442 delta += alen;
17443 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17446 IRExpr* res;
17447 if (xIsZ)
17448 res = binop( Iop_V128HLtoV256,
17449 binop( Iop_InterleaveHI32x4,
17450 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17451 binop( Iop_InterleaveLO32x4,
17452 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17453 else {
17454 IRTemp s3, s2, s1, s0;
17455 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17456 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
17457 res = binop( Iop_V128HLtoV256,
17458 binop( Iop_64HLtoV128,
17459 unop( Iop_32Sto64, mkexpr(s3) ),
17460 unop( Iop_32Sto64, mkexpr(s2) ) ),
17461 binop( Iop_64HLtoV128,
17462 unop( Iop_32Sto64, mkexpr(s1) ),
17463 unop( Iop_32Sto64, mkexpr(s0) ) ) );
17466 putYMMReg ( rG, res );
17468 return delta;
17472 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17473 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17474 Long delta, Bool isAvx, Bool xIsZ )
17476 IRTemp addr = IRTemp_INVALID;
17477 Int alen = 0;
17478 HChar dis_buf[50];
17479 IRTemp srcVec = newTemp(Ity_V128);
17480 UChar modrm = getUChar(delta);
17481 const HChar* mbV = isAvx ? "v" : "";
17482 const HChar how = xIsZ ? 'z' : 's';
17483 UInt rG = gregOfRexRM(pfx, modrm);
17484 if ( epartIsReg(modrm) ) {
17485 UInt rE = eregOfRexRM(pfx, modrm);
17486 assign( srcVec, getXMMReg(rE) );
17487 delta += 1;
17488 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17489 } else {
17490 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17491 assign( srcVec,
17492 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17493 delta += alen;
17494 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17497 IRTemp zeroVec = newTemp(Ity_V128);
17498 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17500 IRExpr* res
17501 = binop(Iop_InterleaveLO8x16,
17502 mkexpr(zeroVec),
17503 binop(Iop_InterleaveLO8x16,
17504 mkexpr(zeroVec), mkexpr(srcVec)));
17505 if (!xIsZ)
17506 res = binop(Iop_SarN32x4,
17507 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
17509 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17511 return delta;
17515 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17516 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17517 Long delta, Bool xIsZ )
17519 IRTemp addr = IRTemp_INVALID;
17520 Int alen = 0;
17521 HChar dis_buf[50];
17522 IRTemp srcVec = newTemp(Ity_V128);
17523 UChar modrm = getUChar(delta);
17524 UChar how = xIsZ ? 'z' : 's';
17525 UInt rG = gregOfRexRM(pfx, modrm);
17526 if ( epartIsReg(modrm) ) {
17527 UInt rE = eregOfRexRM(pfx, modrm);
17528 assign( srcVec, getXMMReg(rE) );
17529 delta += 1;
17530 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17531 } else {
17532 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17533 assign( srcVec,
17534 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17535 delta += alen;
17536 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17539 IRTemp zeroVec = newTemp(Ity_V128);
17540 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17542 IRExpr* res
17543 = binop( Iop_V128HLtoV256,
17544 binop(Iop_InterleaveHI8x16,
17545 mkexpr(zeroVec),
17546 binop(Iop_InterleaveLO8x16,
17547 mkexpr(zeroVec), mkexpr(srcVec)) ),
17548 binop(Iop_InterleaveLO8x16,
17549 mkexpr(zeroVec),
17550 binop(Iop_InterleaveLO8x16,
17551 mkexpr(zeroVec), mkexpr(srcVec)) ) );
17552 if (!xIsZ)
17553 res = binop(Iop_SarN32x8,
17554 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
17556 putYMMReg ( rG, res );
17558 return delta;
17562 /* Handles 128 bit versions of PMOVSXBQ. */
17563 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17564 Long delta, Bool isAvx )
17566 IRTemp addr = IRTemp_INVALID;
17567 Int alen = 0;
17568 HChar dis_buf[50];
17569 IRTemp srcBytes = newTemp(Ity_I16);
17570 UChar modrm = getUChar(delta);
17571 const HChar* mbV = isAvx ? "v" : "";
17572 UInt rG = gregOfRexRM(pfx, modrm);
17573 if ( epartIsReg(modrm) ) {
17574 UInt rE = eregOfRexRM(pfx, modrm);
17575 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
17576 delta += 1;
17577 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17578 } else {
17579 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17580 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
17581 delta += alen;
17582 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17585 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17586 ( rG, binop( Iop_64HLtoV128,
17587 unop( Iop_8Sto64,
17588 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
17589 unop( Iop_8Sto64,
17590 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17591 return delta;
17595 /* Handles 256 bit versions of PMOVSXBQ. */
17596 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17597 Long delta )
17599 IRTemp addr = IRTemp_INVALID;
17600 Int alen = 0;
17601 HChar dis_buf[50];
17602 IRTemp srcBytes = newTemp(Ity_I32);
17603 UChar modrm = getUChar(delta);
17604 UInt rG = gregOfRexRM(pfx, modrm);
17605 if ( epartIsReg(modrm) ) {
17606 UInt rE = eregOfRexRM(pfx, modrm);
17607 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17608 delta += 1;
17609 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17610 } else {
17611 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17612 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17613 delta += alen;
17614 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17617 putYMMReg
17618 ( rG, binop( Iop_V128HLtoV256,
17619 binop( Iop_64HLtoV128,
17620 unop( Iop_8Sto64,
17621 unop( Iop_16HIto8,
17622 unop( Iop_32HIto16,
17623 mkexpr(srcBytes) ) ) ),
17624 unop( Iop_8Sto64,
17625 unop( Iop_16to8,
17626 unop( Iop_32HIto16,
17627 mkexpr(srcBytes) ) ) ) ),
17628 binop( Iop_64HLtoV128,
17629 unop( Iop_8Sto64,
17630 unop( Iop_16HIto8,
17631 unop( Iop_32to16,
17632 mkexpr(srcBytes) ) ) ),
17633 unop( Iop_8Sto64,
17634 unop( Iop_16to8,
17635 unop( Iop_32to16,
17636 mkexpr(srcBytes) ) ) ) ) ) );
17637 return delta;
17641 /* Handles 128 bit versions of PMOVZXBQ. */
17642 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17643 Long delta, Bool isAvx )
17645 IRTemp addr = IRTemp_INVALID;
17646 Int alen = 0;
17647 HChar dis_buf[50];
17648 IRTemp srcVec = newTemp(Ity_V128);
17649 UChar modrm = getUChar(delta);
17650 const HChar* mbV = isAvx ? "v" : "";
17651 UInt rG = gregOfRexRM(pfx, modrm);
17652 if ( epartIsReg(modrm) ) {
17653 UInt rE = eregOfRexRM(pfx, modrm);
17654 assign( srcVec, getXMMReg(rE) );
17655 delta += 1;
17656 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17657 } else {
17658 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17659 assign( srcVec,
17660 unop( Iop_32UtoV128,
17661 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17662 delta += alen;
17663 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17666 IRTemp zeroVec = newTemp(Ity_V128);
17667 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17669 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17670 ( rG, binop( Iop_InterleaveLO8x16,
17671 mkexpr(zeroVec),
17672 binop( Iop_InterleaveLO8x16,
17673 mkexpr(zeroVec),
17674 binop( Iop_InterleaveLO8x16,
17675 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17676 return delta;
17680 /* Handles 256 bit versions of PMOVZXBQ. */
17681 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17682 Long delta )
17684 IRTemp addr = IRTemp_INVALID;
17685 Int alen = 0;
17686 HChar dis_buf[50];
17687 IRTemp srcVec = newTemp(Ity_V128);
17688 UChar modrm = getUChar(delta);
17689 UInt rG = gregOfRexRM(pfx, modrm);
17690 if ( epartIsReg(modrm) ) {
17691 UInt rE = eregOfRexRM(pfx, modrm);
17692 assign( srcVec, getXMMReg(rE) );
17693 delta += 1;
17694 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17695 } else {
17696 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17697 assign( srcVec,
17698 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17699 delta += alen;
17700 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17703 IRTemp zeroVec = newTemp(Ity_V128);
17704 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17706 putYMMReg
17707 ( rG, binop( Iop_V128HLtoV256,
17708 binop( Iop_InterleaveHI8x16,
17709 mkexpr(zeroVec),
17710 binop( Iop_InterleaveLO8x16,
17711 mkexpr(zeroVec),
17712 binop( Iop_InterleaveLO8x16,
17713 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17714 binop( Iop_InterleaveLO8x16,
17715 mkexpr(zeroVec),
17716 binop( Iop_InterleaveLO8x16,
17717 mkexpr(zeroVec),
17718 binop( Iop_InterleaveLO8x16,
17719 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17720 ) );
17721 return delta;
17725 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17726 Long delta, Bool isAvx )
17728 IRTemp addr = IRTemp_INVALID;
17729 Int alen = 0;
17730 HChar dis_buf[50];
17731 UChar modrm = getUChar(delta);
17732 const HChar* mbV = isAvx ? "v" : "";
17733 IRTemp sV = newTemp(Ity_V128);
17734 IRTemp sHi = newTemp(Ity_I64);
17735 IRTemp sLo = newTemp(Ity_I64);
17736 IRTemp dLo = newTemp(Ity_I64);
17737 UInt rG = gregOfRexRM(pfx,modrm);
17738 if (epartIsReg(modrm)) {
17739 UInt rE = eregOfRexRM(pfx,modrm);
17740 assign( sV, getXMMReg(rE) );
17741 delta += 1;
17742 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17743 } else {
17744 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17745 if (!isAvx)
17746 gen_SEGV_if_not_16_aligned(addr);
17747 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17748 delta += alen;
17749 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17751 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17752 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17753 assign( dLo, mkIRExprCCall(
17754 Ity_I64, 0/*regparms*/,
17755 "amd64g_calculate_sse_phminposuw",
17756 &amd64g_calculate_sse_phminposuw,
17757 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17759 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17760 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17761 return delta;
17765 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx,
17766 Long delta, Bool isAvx, UChar opc )
17768 IRTemp addr = IRTemp_INVALID;
17769 Int alen = 0;
17770 HChar dis_buf[50];
17771 UChar modrm = getUChar(delta);
17772 UInt rG = gregOfRexRM(pfx, modrm);
17773 UInt regNoL = 0;
17774 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17776 /* This is a nasty kludge. We need to pass 2 x V128 to the
17777 helper. Since we can't do that, use a dirty
17778 helper to compute the results directly from the XMM regs in
17779 the guest state. That means for the memory case, we need to
17780 move the left operand into a pseudo-register (XMM16, let's
17781 call it). */
17782 if (epartIsReg(modrm)) {
17783 regNoL = eregOfRexRM(pfx, modrm);
17784 delta += 1;
17785 } else {
17786 regNoL = 16; /* use XMM16 as an intermediary */
17787 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17788 /* alignment check needed ???? */
17789 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17790 delta += alen;
17793 void* fn = &amd64g_dirtyhelper_AES;
17794 const HChar* nm = "amd64g_dirtyhelper_AES";
17796 /* Round up the arguments. Note that this is a kludge -- the
17797 use of mkU64 rather than mkIRExpr_HWord implies the
17798 assumption that the host's word size is 64-bit. */
17799 UInt gstOffD = ymmGuestRegOffset(rG);
17800 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17801 UInt gstOffR = ymmGuestRegOffset(regNoR);
17802 IRExpr* opc4 = mkU64(opc);
17803 IRExpr* gstOffDe = mkU64(gstOffD);
17804 IRExpr* gstOffLe = mkU64(gstOffL);
17805 IRExpr* gstOffRe = mkU64(gstOffR);
17806 IRExpr** args
17807 = mkIRExprVec_5( IRExpr_GSPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
17809 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17810 /* It's not really a dirty call, but we can't use the clean helper
17811 mechanism here for the very lame reason that we can't pass 2 x
17812 V128s by value to a helper. Hence this roundabout scheme. */
17813 d->nFxState = 2;
17814 vex_bzero(&d->fxState, sizeof(d->fxState));
17815 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17816 the second for !isAvx or the third for isAvx.
17817 AESIMC (0xDB) reads the first register, and writes the second. */
17818 d->fxState[0].fx = Ifx_Read;
17819 d->fxState[0].offset = gstOffL;
17820 d->fxState[0].size = sizeof(U128);
17821 d->fxState[1].offset = gstOffR;
17822 d->fxState[1].size = sizeof(U128);
17823 if (opc == 0xDB)
17824 d->fxState[1].fx = Ifx_Write;
17825 else if (!isAvx || rG == regNoR)
17826 d->fxState[1].fx = Ifx_Modify;
17827 else {
17828 d->fxState[1].fx = Ifx_Read;
17829 d->nFxState++;
17830 d->fxState[2].fx = Ifx_Write;
17831 d->fxState[2].offset = gstOffD;
17832 d->fxState[2].size = sizeof(U128);
17835 stmt( IRStmt_Dirty(d) );
17837 const HChar* opsuf;
17838 switch (opc) {
17839 case 0xDC: opsuf = "enc"; break;
17840 case 0XDD: opsuf = "enclast"; break;
17841 case 0xDE: opsuf = "dec"; break;
17842 case 0xDF: opsuf = "declast"; break;
17843 case 0xDB: opsuf = "imc"; break;
17844 default: vassert(0);
17846 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17847 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17848 nameXMMReg(regNoR),
17849 (isAvx && opc != 0xDB) ? "," : "",
17850 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17852 if (isAvx)
17853 putYMMRegLane128( rG, 1, mkV128(0) );
17854 return delta;
17857 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx,
17858 Long delta, Bool isAvx )
17860 IRTemp addr = IRTemp_INVALID;
17861 Int alen = 0;
17862 HChar dis_buf[50];
17863 UChar modrm = getUChar(delta);
17864 UInt regNoL = 0;
17865 UInt regNoR = gregOfRexRM(pfx, modrm);
17866 UChar imm = 0;
17868 /* This is a nasty kludge. See AESENC et al. instructions. */
17869 modrm = getUChar(delta);
17870 if (epartIsReg(modrm)) {
17871 regNoL = eregOfRexRM(pfx, modrm);
17872 imm = getUChar(delta+1);
17873 delta += 1+1;
17874 } else {
17875 regNoL = 16; /* use XMM16 as an intermediary */
17876 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17877 /* alignment check ???? . */
17878 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17879 imm = getUChar(delta+alen);
17880 delta += alen+1;
17883 /* Who ya gonna call? Presumably not Ghostbusters. */
17884 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
17885 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
17887 /* Round up the arguments. Note that this is a kludge -- the
17888 use of mkU64 rather than mkIRExpr_HWord implies the
17889 assumption that the host's word size is 64-bit. */
17890 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17891 UInt gstOffR = ymmGuestRegOffset(regNoR);
17893 IRExpr* imme = mkU64(imm & 0xFF);
17894 IRExpr* gstOffLe = mkU64(gstOffL);
17895 IRExpr* gstOffRe = mkU64(gstOffR);
17896 IRExpr** args
17897 = mkIRExprVec_4( IRExpr_GSPTR(), imme, gstOffLe, gstOffRe );
17899 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17900 /* It's not really a dirty call, but we can't use the clean helper
17901 mechanism here for the very lame reason that we can't pass 2 x
17902 V128s by value to a helper. Hence this roundabout scheme. */
17903 d->nFxState = 2;
17904 vex_bzero(&d->fxState, sizeof(d->fxState));
17905 d->fxState[0].fx = Ifx_Read;
17906 d->fxState[0].offset = gstOffL;
17907 d->fxState[0].size = sizeof(U128);
17908 d->fxState[1].fx = Ifx_Write;
17909 d->fxState[1].offset = gstOffR;
17910 d->fxState[1].size = sizeof(U128);
17911 stmt( IRStmt_Dirty(d) );
17913 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17914 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17915 nameXMMReg(regNoR));
17916 if (isAvx)
17917 putYMMRegLane128( regNoR, 1, mkV128(0) );
17918 return delta;
17922 __attribute__((noinline))
17923 static
17924 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
17925 const VexAbiInfo* vbi,
17926 Prefix pfx, Int sz, Long deltaIN )
17928 IRTemp addr = IRTemp_INVALID;
17929 UChar modrm = 0;
17930 Int alen = 0;
17931 HChar dis_buf[50];
17933 *decode_OK = False;
17935 Long delta = deltaIN;
17936 UChar opc = getUChar(delta);
17937 delta++;
17938 switch (opc) {
17940 case 0x10:
17941 case 0x14:
17942 case 0x15:
17943 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17944 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17945 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17946 Blend at various granularities, with XMM0 (implicit operand)
17947 providing the controlling mask.
17949 if (have66noF2noF3(pfx) && sz == 2) {
17950 modrm = getUChar(delta);
17952 const HChar* nm = NULL;
17953 UInt gran = 0;
17954 IROp opSAR = Iop_INVALID;
17955 switch (opc) {
17956 case 0x10:
17957 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17958 break;
17959 case 0x14:
17960 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17961 break;
17962 case 0x15:
17963 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17964 break;
17966 vassert(nm);
17968 IRTemp vecE = newTemp(Ity_V128);
17969 IRTemp vecG = newTemp(Ity_V128);
17970 IRTemp vec0 = newTemp(Ity_V128);
17972 if ( epartIsReg(modrm) ) {
17973 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17974 delta += 1;
17975 DIP( "%s %s,%s\n", nm,
17976 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17977 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17978 } else {
17979 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17980 gen_SEGV_if_not_16_aligned( addr );
17981 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17982 delta += alen;
17983 DIP( "%s %s,%s\n", nm,
17984 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17987 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17988 assign(vec0, getXMMReg(0));
17990 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
17991 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
17993 goto decode_success;
17995 break;
17997 case 0x17:
17998 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17999 Logical compare (set ZF and CF from AND/ANDN of the operands) */
18000 if (have66noF2noF3(pfx)
18001 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
18002 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
18003 goto decode_success;
18005 break;
18007 case 0x20:
18008 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
18009 Packed Move with Sign Extend from Byte to Word (XMM) */
18010 if (have66noF2noF3(pfx) && sz == 2) {
18011 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18012 False/*!isAvx*/, False/*!xIsZ*/ );
18013 goto decode_success;
18015 break;
18017 case 0x21:
18018 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
18019 Packed Move with Sign Extend from Byte to DWord (XMM) */
18020 if (have66noF2noF3(pfx) && sz == 2) {
18021 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18022 False/*!isAvx*/, False/*!xIsZ*/ );
18023 goto decode_success;
18025 break;
18027 case 0x22:
18028 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
18029 Packed Move with Sign Extend from Byte to QWord (XMM) */
18030 if (have66noF2noF3(pfx) && sz == 2) {
18031 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18032 goto decode_success;
18034 break;
18036 case 0x23:
18037 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
18038 Packed Move with Sign Extend from Word to DWord (XMM) */
18039 if (have66noF2noF3(pfx) && sz == 2) {
18040 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
18041 False/*!isAvx*/, False/*!xIsZ*/);
18042 goto decode_success;
18044 break;
18046 case 0x24:
18047 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
18048 Packed Move with Sign Extend from Word to QWord (XMM) */
18049 if (have66noF2noF3(pfx) && sz == 2) {
18050 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18051 goto decode_success;
18053 break;
18055 case 0x25:
18056 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
18057 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
18058 if (have66noF2noF3(pfx) && sz == 2) {
18059 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18060 False/*!isAvx*/, False/*!xIsZ*/ );
18061 goto decode_success;
18063 break;
18065 case 0x28:
18066 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
18067 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
18068 64-bit half */
18069 /* This is a really poor translation -- could be improved if
18070 performance critical. It's a copy-paste of PMULUDQ, too. */
18071 if (have66noF2noF3(pfx) && sz == 2) {
18072 IRTemp sV = newTemp(Ity_V128);
18073 IRTemp dV = newTemp(Ity_V128);
18074 modrm = getUChar(delta);
18075 UInt rG = gregOfRexRM(pfx,modrm);
18076 assign( dV, getXMMReg(rG) );
18077 if (epartIsReg(modrm)) {
18078 UInt rE = eregOfRexRM(pfx,modrm);
18079 assign( sV, getXMMReg(rE) );
18080 delta += 1;
18081 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
18082 } else {
18083 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18084 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
18085 delta += alen;
18086 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
18089 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
18090 goto decode_success;
18092 break;
18094 case 0x29:
18095 /* 66 0F 38 29 = PCMPEQQ
18096 64x2 equality comparison */
18097 if (have66noF2noF3(pfx) && sz == 2) {
18098 /* FIXME: this needs an alignment check */
18099 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18100 "pcmpeqq", Iop_CmpEQ64x2, False );
18101 goto decode_success;
18103 break;
18105 case 0x2A:
18106 /* 66 0F 38 2A = MOVNTDQA
18107 "non-temporal" "streaming" load
18108 Handle like MOVDQA but only memory operand is allowed */
18109 if (have66noF2noF3(pfx) && sz == 2) {
18110 modrm = getUChar(delta);
18111 if (!epartIsReg(modrm)) {
18112 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18113 gen_SEGV_if_not_16_aligned( addr );
18114 putXMMReg( gregOfRexRM(pfx,modrm),
18115 loadLE(Ity_V128, mkexpr(addr)) );
18116 DIP("movntdqa %s,%s\n", dis_buf,
18117 nameXMMReg(gregOfRexRM(pfx,modrm)));
18118 delta += alen;
18119 goto decode_success;
18122 break;
18124 case 0x2B:
18125 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18126 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18127 if (have66noF2noF3(pfx) && sz == 2) {
18129 modrm = getUChar(delta);
18131 IRTemp argL = newTemp(Ity_V128);
18132 IRTemp argR = newTemp(Ity_V128);
18134 if ( epartIsReg(modrm) ) {
18135 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18136 delta += 1;
18137 DIP( "packusdw %s,%s\n",
18138 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18139 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18140 } else {
18141 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18142 gen_SEGV_if_not_16_aligned( addr );
18143 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18144 delta += alen;
18145 DIP( "packusdw %s,%s\n",
18146 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18149 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18151 putXMMReg( gregOfRexRM(pfx, modrm),
18152 binop( Iop_QNarrowBin32Sto16Ux8,
18153 mkexpr(argL), mkexpr(argR)) );
18155 goto decode_success;
18157 break;
18159 case 0x30:
18160 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18161 Packed Move with Zero Extend from Byte to Word (XMM) */
18162 if (have66noF2noF3(pfx) && sz == 2) {
18163 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18164 False/*!isAvx*/, True/*xIsZ*/ );
18165 goto decode_success;
18167 break;
18169 case 0x31:
18170 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18171 Packed Move with Zero Extend from Byte to DWord (XMM) */
18172 if (have66noF2noF3(pfx) && sz == 2) {
18173 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18174 False/*!isAvx*/, True/*xIsZ*/ );
18175 goto decode_success;
18177 break;
18179 case 0x32:
18180 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18181 Packed Move with Zero Extend from Byte to QWord (XMM) */
18182 if (have66noF2noF3(pfx) && sz == 2) {
18183 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18184 goto decode_success;
18186 break;
18188 case 0x33:
18189 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18190 Packed Move with Zero Extend from Word to DWord (XMM) */
18191 if (have66noF2noF3(pfx) && sz == 2) {
18192 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
18193 False/*!isAvx*/, True/*xIsZ*/ );
18194 goto decode_success;
18196 break;
18198 case 0x34:
18199 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18200 Packed Move with Zero Extend from Word to QWord (XMM) */
18201 if (have66noF2noF3(pfx) && sz == 2) {
18202 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18203 goto decode_success;
18205 break;
18207 case 0x35:
18208 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18209 Packed Move with Zero Extend from DWord to QWord (XMM) */
18210 if (have66noF2noF3(pfx) && sz == 2) {
18211 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18212 False/*!isAvx*/, True/*xIsZ*/ );
18213 goto decode_success;
18215 break;
18217 case 0x37:
18218 /* 66 0F 38 37 = PCMPGTQ
18219 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18221 if (have66noF2noF3(pfx) && sz == 2) {
18222 /* FIXME: this needs an alignment check */
18223 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18224 "pcmpgtq", Iop_CmpGT64Sx2, False );
18225 goto decode_success;
18227 break;
18229 case 0x38:
18230 case 0x3C:
18231 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18232 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18234 if (have66noF2noF3(pfx) && sz == 2) {
18235 /* FIXME: this needs an alignment check */
18236 Bool isMAX = opc == 0x3C;
18237 delta = dis_SSEint_E_to_G(
18238 vbi, pfx, delta,
18239 isMAX ? "pmaxsb" : "pminsb",
18240 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
18241 False
18243 goto decode_success;
18245 break;
18247 case 0x39:
18248 case 0x3D:
18249 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18250 Minimum of Packed Signed Double Word Integers (XMM)
18251 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18252 Maximum of Packed Signed Double Word Integers (XMM)
18254 if (have66noF2noF3(pfx) && sz == 2) {
18255 /* FIXME: this needs an alignment check */
18256 Bool isMAX = opc == 0x3D;
18257 delta = dis_SSEint_E_to_G(
18258 vbi, pfx, delta,
18259 isMAX ? "pmaxsd" : "pminsd",
18260 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
18261 False
18263 goto decode_success;
18265 break;
18267 case 0x3A:
18268 case 0x3E:
18269 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18270 Minimum of Packed Unsigned Word Integers (XMM)
18271 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18272 Maximum of Packed Unsigned Word Integers (XMM)
18274 if (have66noF2noF3(pfx) && sz == 2) {
18275 /* FIXME: this needs an alignment check */
18276 Bool isMAX = opc == 0x3E;
18277 delta = dis_SSEint_E_to_G(
18278 vbi, pfx, delta,
18279 isMAX ? "pmaxuw" : "pminuw",
18280 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
18281 False
18283 goto decode_success;
18285 break;
18287 case 0x3B:
18288 case 0x3F:
18289 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18290 Minimum of Packed Unsigned Doubleword Integers (XMM)
18291 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18292 Maximum of Packed Unsigned Doubleword Integers (XMM)
18294 if (have66noF2noF3(pfx) && sz == 2) {
18295 /* FIXME: this needs an alignment check */
18296 Bool isMAX = opc == 0x3F;
18297 delta = dis_SSEint_E_to_G(
18298 vbi, pfx, delta,
18299 isMAX ? "pmaxud" : "pminud",
18300 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
18301 False
18303 goto decode_success;
18305 break;
18307 case 0x40:
18308 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18309 32x4 integer multiply from xmm2/m128 to xmm1 */
18310 if (have66noF2noF3(pfx) && sz == 2) {
18312 modrm = getUChar(delta);
18314 IRTemp argL = newTemp(Ity_V128);
18315 IRTemp argR = newTemp(Ity_V128);
18317 if ( epartIsReg(modrm) ) {
18318 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18319 delta += 1;
18320 DIP( "pmulld %s,%s\n",
18321 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18322 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18323 } else {
18324 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18325 gen_SEGV_if_not_16_aligned( addr );
18326 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18327 delta += alen;
18328 DIP( "pmulld %s,%s\n",
18329 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18332 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18334 putXMMReg( gregOfRexRM(pfx, modrm),
18335 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
18337 goto decode_success;
18339 break;
18341 case 0x41:
18342 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18343 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18344 if (have66noF2noF3(pfx) && sz == 2) {
18345 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
18346 goto decode_success;
18348 break;
18350 case 0xDC:
18351 case 0xDD:
18352 case 0xDE:
18353 case 0xDF:
18354 case 0xDB:
18355 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18356 DD /r = AESENCLAST xmm1, xmm2/m128
18357 DE /r = AESDEC xmm1, xmm2/m128
18358 DF /r = AESDECLAST xmm1, xmm2/m128
18360 DB /r = AESIMC xmm1, xmm2/m128 */
18361 if (have66noF2noF3(pfx) && sz == 2) {
18362 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
18363 goto decode_success;
18365 break;
18367 case 0xF0:
18368 case 0xF1:
18369 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18370 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18371 The decoding on this is a bit unusual.
18373 if (haveF2noF3(pfx)
18374 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
18375 modrm = getUChar(delta);
18377 if (opc == 0xF0)
18378 sz = 1;
18379 else
18380 vassert(sz == 2 || sz == 4 || sz == 8);
18382 IRType tyE = szToITy(sz);
18383 IRTemp valE = newTemp(tyE);
18385 if (epartIsReg(modrm)) {
18386 assign(valE, getIRegE(sz, pfx, modrm));
18387 delta += 1;
18388 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
18389 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18390 } else {
18391 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18392 assign(valE, loadLE(tyE, mkexpr(addr)));
18393 delta += alen;
18394 DIP("crc32b %s,%s\n", dis_buf,
18395 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18398 /* Somewhat funny getting/putting of the crc32 value, in order
18399 to ensure that it turns into 64-bit gets and puts. However,
18400 mask off the upper 32 bits so as to not get memcheck false
18401 +ves around the helper call. */
18402 IRTemp valG0 = newTemp(Ity_I64);
18403 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
18404 mkU64(0xFFFFFFFF)));
18406 const HChar* nm = NULL;
18407 void* fn = NULL;
18408 switch (sz) {
18409 case 1: nm = "amd64g_calc_crc32b";
18410 fn = &amd64g_calc_crc32b; break;
18411 case 2: nm = "amd64g_calc_crc32w";
18412 fn = &amd64g_calc_crc32w; break;
18413 case 4: nm = "amd64g_calc_crc32l";
18414 fn = &amd64g_calc_crc32l; break;
18415 case 8: nm = "amd64g_calc_crc32q";
18416 fn = &amd64g_calc_crc32q; break;
18418 vassert(nm && fn);
18419 IRTemp valG1 = newTemp(Ity_I64);
18420 assign(valG1,
18421 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
18422 mkIRExprVec_2(mkexpr(valG0),
18423 widenUto64(mkexpr(valE)))));
18425 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
18426 goto decode_success;
18428 break;
18430 default:
18431 break;
18435 //decode_failure:
18436 *decode_OK = False;
18437 return deltaIN;
18439 decode_success:
18440 *decode_OK = True;
18441 return delta;
18445 /*------------------------------------------------------------*/
18446 /*--- ---*/
18447 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18448 /*--- ---*/
18449 /*------------------------------------------------------------*/
18451 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx,
18452 Long delta, Bool isAvx )
18454 IRTemp addr = IRTemp_INVALID;
18455 IRTemp t0 = IRTemp_INVALID;
18456 IRTemp t1 = IRTemp_INVALID;
18457 IRTemp t2 = IRTemp_INVALID;
18458 IRTemp t3 = IRTemp_INVALID;
18459 UChar modrm = getUChar(delta);
18460 Int alen = 0;
18461 HChar dis_buf[50];
18462 UInt rG = gregOfRexRM(pfx,modrm);
18463 Int imm8_20;
18464 IRTemp xmm_vec = newTemp(Ity_V128);
18465 IRTemp d16 = newTemp(Ity_I16);
18466 const HChar* mbV = isAvx ? "v" : "";
18468 vassert(0==getRexW(pfx)); /* ensured by caller */
18469 assign( xmm_vec, getXMMReg(rG) );
18470 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18472 if ( epartIsReg( modrm ) ) {
18473 imm8_20 = (Int)(getUChar(delta+1) & 7);
18474 } else {
18475 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18476 imm8_20 = (Int)(getUChar(delta+alen) & 7);
18479 switch (imm8_20) {
18480 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
18481 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
18482 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
18483 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
18484 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
18485 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
18486 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
18487 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
18488 default: vassert(0);
18491 if ( epartIsReg( modrm ) ) {
18492 UInt rE = eregOfRexRM(pfx,modrm);
18493 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
18494 delta += 1+1;
18495 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
18496 nameXMMReg( rG ), nameIReg32( rE ) );
18497 } else {
18498 storeLE( mkexpr(addr), mkexpr(d16) );
18499 delta += alen+1;
18500 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
18502 return delta;
18506 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx,
18507 Long delta, Bool isAvx )
18509 IRTemp addr = IRTemp_INVALID;
18510 IRTemp t0 = IRTemp_INVALID;
18511 IRTemp t1 = IRTemp_INVALID;
18512 IRTemp t2 = IRTemp_INVALID;
18513 IRTemp t3 = IRTemp_INVALID;
18514 UChar modrm = 0;
18515 Int alen = 0;
18516 HChar dis_buf[50];
18518 Int imm8_10;
18519 IRTemp xmm_vec = newTemp(Ity_V128);
18520 IRTemp src_dword = newTemp(Ity_I32);
18521 const HChar* mbV = isAvx ? "v" : "";
18523 vassert(0==getRexW(pfx)); /* ensured by caller */
18524 modrm = getUChar(delta);
18525 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18526 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18528 if ( epartIsReg( modrm ) ) {
18529 imm8_10 = (Int)(getUChar(delta+1) & 3);
18530 } else {
18531 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18532 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18535 switch ( imm8_10 ) {
18536 case 0: assign( src_dword, mkexpr(t0) ); break;
18537 case 1: assign( src_dword, mkexpr(t1) ); break;
18538 case 2: assign( src_dword, mkexpr(t2) ); break;
18539 case 3: assign( src_dword, mkexpr(t3) ); break;
18540 default: vassert(0);
18543 if ( epartIsReg( modrm ) ) {
18544 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
18545 delta += 1+1;
18546 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
18547 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18548 nameIReg32( eregOfRexRM(pfx, modrm) ) );
18549 } else {
18550 storeLE( mkexpr(addr), mkexpr(src_dword) );
18551 delta += alen+1;
18552 DIP( "%spextrd $%d, %s,%s\n", mbV,
18553 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18555 return delta;
18559 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx,
18560 Long delta, Bool isAvx )
18562 IRTemp addr = IRTemp_INVALID;
18563 UChar modrm = 0;
18564 Int alen = 0;
18565 HChar dis_buf[50];
18567 Int imm8_0;
18568 IRTemp xmm_vec = newTemp(Ity_V128);
18569 IRTemp src_qword = newTemp(Ity_I64);
18570 const HChar* mbV = isAvx ? "v" : "";
18572 vassert(1==getRexW(pfx)); /* ensured by caller */
18573 modrm = getUChar(delta);
18574 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18576 if ( epartIsReg( modrm ) ) {
18577 imm8_0 = (Int)(getUChar(delta+1) & 1);
18578 } else {
18579 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18580 imm8_0 = (Int)(getUChar(delta+alen) & 1);
18583 switch ( imm8_0 ) {
18584 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
18585 break;
18586 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
18587 break;
18588 default: vassert(0);
18591 if ( epartIsReg( modrm ) ) {
18592 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18593 delta += 1+1;
18594 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18595 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18596 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18597 } else {
18598 storeLE( mkexpr(addr), mkexpr(src_qword) );
18599 delta += alen+1;
18600 DIP( "%spextrq $%d, %s,%s\n", mbV,
18601 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18603 return delta;
18606 static IRExpr* math_CTZ32(IRExpr *exp)
18608 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18609 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18612 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18613 Long delta, UChar opc, UChar imm,
18614 HChar dis_buf[])
18616 /* We only handle PCMPISTRI for now */
18617 vassert((opc & 0x03) == 0x03);
18618 /* And only an immediate byte of 0x38 or 0x3A */
18619 vassert((imm & ~0x02) == 0x38);
18621 /* FIXME: Is this correct when RegNoL == 16 ? */
18622 IRTemp argL = newTemp(Ity_V128);
18623 assign(argL, getXMMReg(regNoL));
18624 IRTemp argR = newTemp(Ity_V128);
18625 assign(argR, getXMMReg(regNoR));
18627 IRTemp zmaskL = newTemp(Ity_I32);
18628 assign(zmaskL, unop(Iop_16Uto32,
18629 unop(Iop_GetMSBs8x16,
18630 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
18631 IRTemp zmaskR = newTemp(Ity_I32);
18632 assign(zmaskR, unop(Iop_16Uto32,
18633 unop(Iop_GetMSBs8x16,
18634 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
18636 /* We want validL = ~(zmaskL | -zmaskL)
18638 But this formulation kills memcheck's validity tracking when any
18639 bits above the first "1" are invalid. So reformulate as:
18641 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18644 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
18646 /* Generate a bool expression which is zero iff the original is
18647 zero. Do this carefully so memcheck can propagate validity bits
18648 correctly.
18650 IRTemp zmaskL_zero = newTemp(Ity_I1);
18651 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
18653 IRTemp validL = newTemp(Ity_I32);
18654 assign(validL, binop(Iop_Sub32,
18655 IRExpr_ITE(mkexpr(zmaskL_zero),
18656 binop(Iop_Shl32, mkU32(1), ctzL),
18657 mkU32(0)),
18658 mkU32(1)));
18660 /* And similarly for validR. */
18661 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18662 IRTemp zmaskR_zero = newTemp(Ity_I1);
18663 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
18664 IRTemp validR = newTemp(Ity_I32);
18665 assign(validR, binop(Iop_Sub32,
18666 IRExpr_ITE(mkexpr(zmaskR_zero),
18667 binop(Iop_Shl32, mkU32(1), ctzR),
18668 mkU32(0)),
18669 mkU32(1)));
18671 /* Do the actual comparison. */
18672 IRExpr *boolResII = unop(Iop_16Uto32,
18673 unop(Iop_GetMSBs8x16,
18674 binop(Iop_CmpEQ8x16, mkexpr(argL),
18675 mkexpr(argR))));
18677 /* Compute boolresII & validL & validR (i.e., if both valid, use
18678 comparison result) */
18679 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18680 binop(Iop_And32,
18681 mkexpr(validL), mkexpr(validR)));
18683 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18684 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18685 mkexpr(validL), mkexpr(validR)));
18686 /* Otherwise, zero. */
18687 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18688 binop(Iop_Or32, intRes1_a, intRes1_b));
18690 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18691 result. */
18692 IRTemp intRes2 = newTemp(Ity_I32);
18693 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18694 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18696 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18697 of the msb. Since it is clear, we return the index of the
18698 lsb. */
18699 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18700 mkexpr(intRes2), mkU32(0x10000)));
18702 /* And thats our rcx. */
18703 putIReg32(R_RCX, newECX);
18705 /* Now for the condition codes... */
18707 /* C == 0 iff intRes2 == 0 */
18708 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18709 mkU32(0)),
18710 mkU32(1 << AMD64G_CC_SHIFT_C),
18711 mkU32(0));
18712 /* Z == 1 iff any in argL is 0 */
18713 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18714 mkU32(1 << AMD64G_CC_SHIFT_Z),
18715 mkU32(0));
18716 /* S == 1 iff any in argR is 0 */
18717 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18718 mkU32(1 << AMD64G_CC_SHIFT_S),
18719 mkU32(0));
18720 /* O == IntRes2[0] */
18721 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18722 mkU32(0x01)),
18723 mkU8(AMD64G_CC_SHIFT_O));
18725 /* Put them all together */
18726 IRTemp cc = newTemp(Ity_I64);
18727 assign(cc, widenUto64(binop(Iop_Or32,
18728 binop(Iop_Or32, c_bit, z_bit),
18729 binop(Iop_Or32, s_bit, o_bit))));
18730 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18731 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18732 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18733 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18735 return delta;
18738 /* This can fail, in which case it returns the original (unchanged)
18739 delta. */
18740 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx,
18741 Long delta, Bool isAvx, UChar opc )
18743 Long delta0 = delta;
18744 UInt isISTRx = opc & 2;
18745 UInt isxSTRM = (opc & 1) ^ 1;
18746 UInt regNoL = 0;
18747 UInt regNoR = 0;
18748 UChar imm = 0;
18749 IRTemp addr = IRTemp_INVALID;
18750 Int alen = 0;
18751 HChar dis_buf[50];
18753 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18754 (which is clean). Since we can't do that, use a dirty helper to
18755 compute the results directly from the XMM regs in the guest
18756 state. That means for the memory case, we need to move the left
18757 operand into a pseudo-register (XMM16, let's call it). */
18758 UChar modrm = getUChar(delta);
18759 if (epartIsReg(modrm)) {
18760 regNoL = eregOfRexRM(pfx, modrm);
18761 regNoR = gregOfRexRM(pfx, modrm);
18762 imm = getUChar(delta+1);
18763 delta += 1+1;
18764 } else {
18765 regNoL = 16; /* use XMM16 as an intermediary */
18766 regNoR = gregOfRexRM(pfx, modrm);
18767 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18768 /* No alignment check; I guess that makes sense, given that
18769 these insns are for dealing with C style strings. */
18770 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18771 imm = getUChar(delta+alen);
18772 delta += alen+1;
18775 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18776 itself. */
18777 if (regNoL == 16) {
18778 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18779 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18780 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18781 } else {
18782 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18783 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18784 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18787 /* Handle special case(s). */
18788 if (imm == 0x3A && isISTRx && !isxSTRM) {
18789 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18790 opc, imm, dis_buf);
18793 /* Now we know the XMM reg numbers for the operands, and the
18794 immediate byte. Is it one we can actually handle? Throw out any
18795 cases for which the helper function has not been verified. */
18796 switch (imm) {
18797 case 0x00: case 0x02:
18798 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18799 case 0x10: case 0x12: case 0x14:
18800 case 0x18: case 0x1A:
18801 case 0x30: case 0x34:
18802 case 0x38: case 0x3A:
18803 case 0x40: case 0x42: case 0x44: case 0x46:
18804 case 0x4A:
18805 case 0x62:
18806 case 0x70: case 0x72:
18807 break;
18808 // the 16-bit character versions of the above
18809 case 0x01: case 0x03:
18810 case 0x09: case 0x0B: case 0x0D:
18811 case 0x13:
18812 case 0x19: case 0x1B:
18813 case 0x39: case 0x3B:
18814 case 0x41: case 0x45:
18815 case 0x4B:
18816 break;
18817 default:
18818 return delta0; /*FAIL*/
18821 /* Who ya gonna call? Presumably not Ghostbusters. */
18822 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
18823 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
18825 /* Round up the arguments. Note that this is a kludge -- the use
18826 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18827 the host's word size is 64-bit. */
18828 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18829 UInt gstOffR = ymmGuestRegOffset(regNoR);
18831 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18832 IRExpr* gstOffLe = mkU64(gstOffL);
18833 IRExpr* gstOffRe = mkU64(gstOffR);
18834 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18835 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18836 IRExpr** args
18837 = mkIRExprVec_6( IRExpr_GSPTR(),
18838 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
18840 IRTemp resT = newTemp(Ity_I64);
18841 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18842 /* It's not really a dirty call, but we can't use the clean helper
18843 mechanism here for the very lame reason that we can't pass 2 x
18844 V128s by value to a helper. Hence this roundabout scheme. */
18845 d->nFxState = 2;
18846 vex_bzero(&d->fxState, sizeof(d->fxState));
18847 d->fxState[0].fx = Ifx_Read;
18848 d->fxState[0].offset = gstOffL;
18849 d->fxState[0].size = sizeof(U128);
18850 d->fxState[1].fx = Ifx_Read;
18851 d->fxState[1].offset = gstOffR;
18852 d->fxState[1].size = sizeof(U128);
18853 if (isxSTRM) {
18854 /* Declare that the helper writes XMM0. */
18855 d->nFxState = 3;
18856 d->fxState[2].fx = Ifx_Write;
18857 d->fxState[2].offset = ymmGuestRegOffset(0);
18858 d->fxState[2].size = sizeof(U128);
18861 stmt( IRStmt_Dirty(d) );
18863 /* Now resT[15:0] holds the new OSZACP values, so the condition
18864 codes must be updated. And for a xSTRI case, resT[31:16] holds
18865 the new ECX value, so stash that too. */
18866 if (!isxSTRM) {
18867 putIReg64(R_RCX, binop(Iop_And64,
18868 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18869 mkU64(0xFFFF)));
18872 /* Zap the upper half of the dest reg as per AVX conventions. */
18873 if (isxSTRM && isAvx)
18874 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18876 stmt( IRStmt_Put(
18877 OFFB_CC_DEP1,
18878 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18880 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18881 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18882 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18884 return delta;
18888 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18890 vassert(imm8 >= 0 && imm8 <= 15);
18892 // Create a V128 value which has the selected byte in the
18893 // specified lane, and zeroes everywhere else.
18894 IRTemp tmp128 = newTemp(Ity_V128);
18895 IRTemp halfshift = newTemp(Ity_I64);
18896 assign(halfshift, binop(Iop_Shl64,
18897 unop(Iop_8Uto64, mkexpr(u8)),
18898 mkU8(8 * (imm8 & 7))));
18899 if (imm8 < 8) {
18900 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18901 } else {
18902 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18905 UShort mask = ~(1 << imm8);
18906 IRTemp res = newTemp(Ity_V128);
18907 assign( res, binop(Iop_OrV128,
18908 mkexpr(tmp128),
18909 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
18910 return res;
18914 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18916 IRTemp z32 = newTemp(Ity_I32);
18917 assign(z32, mkU32(0));
18919 /* Surround u32 with zeroes as per imm, giving us something we can
18920 OR into a suitably masked-out v128.*/
18921 IRTemp withZs = newTemp(Ity_V128);
18922 UShort mask = 0;
18923 switch (imm8) {
18924 case 3: mask = 0x0FFF;
18925 assign(withZs, mkV128from32s(u32, z32, z32, z32));
18926 break;
18927 case 2: mask = 0xF0FF;
18928 assign(withZs, mkV128from32s(z32, u32, z32, z32));
18929 break;
18930 case 1: mask = 0xFF0F;
18931 assign(withZs, mkV128from32s(z32, z32, u32, z32));
18932 break;
18933 case 0: mask = 0xFFF0;
18934 assign(withZs, mkV128from32s(z32, z32, z32, u32));
18935 break;
18936 default: vassert(0);
18939 IRTemp res = newTemp(Ity_V128);
18940 assign(res, binop( Iop_OrV128,
18941 mkexpr(withZs),
18942 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18943 return res;
18947 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18949 /* Surround u64 with zeroes as per imm, giving us something we can
18950 OR into a suitably masked-out v128.*/
18951 IRTemp withZs = newTemp(Ity_V128);
18952 UShort mask = 0;
18953 if (imm8 == 0) {
18954 mask = 0xFF00;
18955 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18956 } else {
18957 vassert(imm8 == 1);
18958 mask = 0x00FF;
18959 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18962 IRTemp res = newTemp(Ity_V128);
18963 assign( res, binop( Iop_OrV128,
18964 mkexpr(withZs),
18965 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18966 return res;
18970 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18972 const IRTemp inval = IRTemp_INVALID;
18973 IRTemp dstDs[4] = { inval, inval, inval, inval };
18974 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
18976 vassert(imm8 <= 255);
18977 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18979 UInt imm8_zmask = (imm8 & 15);
18980 IRTemp zero_32 = newTemp(Ity_I32);
18981 assign( zero_32, mkU32(0) );
18982 IRTemp resV = newTemp(Ity_V128);
18983 assign( resV, mkV128from32s(
18984 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18985 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18986 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18987 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18988 return resV;
18992 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx,
18993 Long delta, Bool isAvx )
18995 IRTemp addr = IRTemp_INVALID;
18996 Int alen = 0;
18997 HChar dis_buf[50];
18998 IRTemp xmm_vec = newTemp(Ity_V128);
18999 IRTemp sel_lane = newTemp(Ity_I32);
19000 IRTemp shr_lane = newTemp(Ity_I32);
19001 const HChar* mbV = isAvx ? "v" : "";
19002 UChar modrm = getUChar(delta);
19003 IRTemp t3, t2, t1, t0;
19004 Int imm8;
19005 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
19006 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19007 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19009 if ( epartIsReg( modrm ) ) {
19010 imm8 = (Int)getUChar(delta+1);
19011 } else {
19012 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19013 imm8 = (Int)getUChar(delta+alen);
19015 switch ( (imm8 >> 2) & 3 ) {
19016 case 0: assign( sel_lane, mkexpr(t0) ); break;
19017 case 1: assign( sel_lane, mkexpr(t1) ); break;
19018 case 2: assign( sel_lane, mkexpr(t2) ); break;
19019 case 3: assign( sel_lane, mkexpr(t3) ); break;
19020 default: vassert(0);
19022 assign( shr_lane,
19023 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
19025 if ( epartIsReg( modrm ) ) {
19026 putIReg64( eregOfRexRM(pfx,modrm),
19027 unop( Iop_32Uto64,
19028 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
19029 delta += 1+1;
19030 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
19031 nameXMMReg( gregOfRexRM(pfx, modrm) ),
19032 nameIReg64( eregOfRexRM(pfx, modrm) ) );
19033 } else {
19034 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
19035 delta += alen+1;
19036 DIP( "%spextrb $%d,%s,%s\n", mbV,
19037 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
19040 return delta;
19044 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
19046 vassert(imm8 < 256);
19047 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
19048 IRTemp and_vec = newTemp(Ity_V128);
19049 IRTemp sum_vec = newTemp(Ity_V128);
19050 IRTemp rm = newTemp(Ity_I32);
19051 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19052 assign( and_vec, binop( Iop_AndV128,
19053 triop( Iop_Mul64Fx2,
19054 mkexpr(rm),
19055 mkexpr(dst_vec), mkexpr(src_vec) ),
19056 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
19058 assign( sum_vec, binop( Iop_Add64F0x2,
19059 binop( Iop_InterleaveHI64x2,
19060 mkexpr(and_vec), mkexpr(and_vec) ),
19061 binop( Iop_InterleaveLO64x2,
19062 mkexpr(and_vec), mkexpr(and_vec) ) ) );
19063 IRTemp res = newTemp(Ity_V128);
19064 assign(res, binop( Iop_AndV128,
19065 binop( Iop_InterleaveLO64x2,
19066 mkexpr(sum_vec), mkexpr(sum_vec) ),
19067 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
19068 return res;
19072 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
19074 vassert(imm8 < 256);
19075 IRTemp tmp_prod_vec = newTemp(Ity_V128);
19076 IRTemp prod_vec = newTemp(Ity_V128);
19077 IRTemp sum_vec = newTemp(Ity_V128);
19078 IRTemp rm = newTemp(Ity_I32);
19079 IRTemp v3, v2, v1, v0;
19080 v3 = v2 = v1 = v0 = IRTemp_INVALID;
19081 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
19082 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
19083 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19084 0xFFFF };
19086 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19087 assign( tmp_prod_vec,
19088 binop( Iop_AndV128,
19089 triop( Iop_Mul32Fx4,
19090 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
19091 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
19092 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
19093 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
19095 assign( sum_vec, triop( Iop_Add32Fx4,
19096 mkexpr(rm),
19097 binop( Iop_InterleaveHI32x4,
19098 mkexpr(prod_vec), mkexpr(prod_vec) ),
19099 binop( Iop_InterleaveLO32x4,
19100 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
19102 IRTemp res = newTemp(Ity_V128);
19103 assign( res, binop( Iop_AndV128,
19104 triop( Iop_Add32Fx4,
19105 mkexpr(rm),
19106 binop( Iop_InterleaveHI32x4,
19107 mkexpr(sum_vec), mkexpr(sum_vec) ),
19108 binop( Iop_InterleaveLO32x4,
19109 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
19110 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
19111 return res;
19115 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
19117 /* Mask out bits of the operands we don't need. This isn't
19118 strictly necessary, but it does ensure Memcheck doesn't
19119 give us any false uninitialised value errors as a
19120 result. */
19121 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19122 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
19124 IRTemp src_maskV = newTemp(Ity_V128);
19125 IRTemp dst_maskV = newTemp(Ity_V128);
19126 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
19127 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
19129 IRTemp src_masked = newTemp(Ity_V128);
19130 IRTemp dst_masked = newTemp(Ity_V128);
19131 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
19132 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
19134 /* Generate 4 64 bit values that we can hand to a clean helper */
19135 IRTemp sHi = newTemp(Ity_I64);
19136 IRTemp sLo = newTemp(Ity_I64);
19137 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
19138 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
19140 IRTemp dHi = newTemp(Ity_I64);
19141 IRTemp dLo = newTemp(Ity_I64);
19142 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
19143 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
19145 /* Compute halves of the result separately */
19146 IRTemp resHi = newTemp(Ity_I64);
19147 IRTemp resLo = newTemp(Ity_I64);
19149 IRExpr** argsHi
19150 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19151 mkU64( 0x80 | (imm8 & 7) ));
19152 IRExpr** argsLo
19153 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19154 mkU64( 0x00 | (imm8 & 7) ));
19156 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19157 "amd64g_calc_mpsadbw",
19158 &amd64g_calc_mpsadbw, argsHi ));
19159 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19160 "amd64g_calc_mpsadbw",
19161 &amd64g_calc_mpsadbw, argsLo ));
19163 IRTemp res = newTemp(Ity_V128);
19164 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
19165 return res;
19168 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx,
19169 Long delta, Bool isAvx )
19171 IRTemp addr = IRTemp_INVALID;
19172 Int alen = 0;
19173 HChar dis_buf[50];
19174 UChar modrm = getUChar(delta);
19175 Int imm8_10;
19176 IRTemp xmm_vec = newTemp(Ity_V128);
19177 IRTemp src_dword = newTemp(Ity_I32);
19178 UInt rG = gregOfRexRM(pfx,modrm);
19179 IRTemp t3, t2, t1, t0;
19180 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19182 assign( xmm_vec, getXMMReg( rG ) );
19183 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19185 if ( epartIsReg( modrm ) ) {
19186 imm8_10 = (Int)(getUChar(delta+1) & 3);
19187 } else {
19188 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19189 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19192 switch ( imm8_10 ) {
19193 case 0: assign( src_dword, mkexpr(t0) ); break;
19194 case 1: assign( src_dword, mkexpr(t1) ); break;
19195 case 2: assign( src_dword, mkexpr(t2) ); break;
19196 case 3: assign( src_dword, mkexpr(t3) ); break;
19197 default: vassert(0);
19200 if ( epartIsReg( modrm ) ) {
19201 UInt rE = eregOfRexRM(pfx,modrm);
19202 putIReg32( rE, mkexpr(src_dword) );
19203 delta += 1+1;
19204 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19205 nameXMMReg( rG ), nameIReg32( rE ) );
19206 } else {
19207 storeLE( mkexpr(addr), mkexpr(src_dword) );
19208 delta += alen+1;
19209 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19210 nameXMMReg( rG ), dis_buf );
19213 return delta;
19217 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
19219 IRTemp t0 = newTemp(Ity_I64);
19220 IRTemp t1 = newTemp(Ity_I64);
19221 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
19222 mkexpr(dV)));
19223 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
19224 mkexpr(sV)));
19226 IRTemp t2 = newTemp(Ity_I64);
19227 IRTemp t3 = newTemp(Ity_I64);
19229 IRExpr** args;
19231 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
19232 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19233 &amd64g_calculate_pclmul, args));
19234 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
19235 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19236 &amd64g_calculate_pclmul, args));
19238 IRTemp res = newTemp(Ity_V128);
19239 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
19240 return res;
19244 __attribute__((noinline))
19245 static
19246 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
19247 const VexAbiInfo* vbi,
19248 Prefix pfx, Int sz, Long deltaIN )
19250 IRTemp addr = IRTemp_INVALID;
19251 UChar modrm = 0;
19252 Int alen = 0;
19253 HChar dis_buf[50];
19255 *decode_OK = False;
19257 Long delta = deltaIN;
19258 UChar opc = getUChar(delta);
19259 delta++;
19260 switch (opc) {
19262 case 0x08:
19263 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19264 if (have66noF2noF3(pfx) && sz == 2) {
19266 IRTemp src0 = newTemp(Ity_F32);
19267 IRTemp src1 = newTemp(Ity_F32);
19268 IRTemp src2 = newTemp(Ity_F32);
19269 IRTemp src3 = newTemp(Ity_F32);
19270 IRTemp res0 = newTemp(Ity_F32);
19271 IRTemp res1 = newTemp(Ity_F32);
19272 IRTemp res2 = newTemp(Ity_F32);
19273 IRTemp res3 = newTemp(Ity_F32);
19274 IRTemp rm = newTemp(Ity_I32);
19275 Int imm = 0;
19277 modrm = getUChar(delta);
19279 if (epartIsReg(modrm)) {
19280 assign( src0,
19281 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19282 assign( src1,
19283 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
19284 assign( src2,
19285 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
19286 assign( src3,
19287 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
19288 imm = getUChar(delta+1);
19289 if (imm & ~15) goto decode_failure;
19290 delta += 1+1;
19291 DIP( "roundps $%d,%s,%s\n",
19292 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19293 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19294 } else {
19295 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19296 gen_SEGV_if_not_16_aligned(addr);
19297 assign( src0, loadLE(Ity_F32,
19298 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19299 assign( src1, loadLE(Ity_F32,
19300 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
19301 assign( src2, loadLE(Ity_F32,
19302 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19303 assign( src3, loadLE(Ity_F32,
19304 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
19305 imm = getUChar(delta+alen);
19306 if (imm & ~15) goto decode_failure;
19307 delta += alen+1;
19308 DIP( "roundps $%d,%s,%s\n",
19309 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19312 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19313 that encoding is the same as the encoding for IRRoundingMode,
19314 we can use that value directly in the IR as a rounding
19315 mode. */
19316 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19318 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
19319 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
19320 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
19321 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
19323 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19324 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19325 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
19326 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
19328 goto decode_success;
19330 break;
19332 case 0x09:
19333 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19334 if (have66noF2noF3(pfx) && sz == 2) {
19336 IRTemp src0 = newTemp(Ity_F64);
19337 IRTemp src1 = newTemp(Ity_F64);
19338 IRTemp res0 = newTemp(Ity_F64);
19339 IRTemp res1 = newTemp(Ity_F64);
19340 IRTemp rm = newTemp(Ity_I32);
19341 Int imm = 0;
19343 modrm = getUChar(delta);
19345 if (epartIsReg(modrm)) {
19346 assign( src0,
19347 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
19348 assign( src1,
19349 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
19350 imm = getUChar(delta+1);
19351 if (imm & ~15) goto decode_failure;
19352 delta += 1+1;
19353 DIP( "roundpd $%d,%s,%s\n",
19354 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19355 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19356 } else {
19357 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19358 gen_SEGV_if_not_16_aligned(addr);
19359 assign( src0, loadLE(Ity_F64,
19360 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19361 assign( src1, loadLE(Ity_F64,
19362 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19363 imm = getUChar(delta+alen);
19364 if (imm & ~15) goto decode_failure;
19365 delta += alen+1;
19366 DIP( "roundpd $%d,%s,%s\n",
19367 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19370 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19371 that encoding is the same as the encoding for IRRoundingMode,
19372 we can use that value directly in the IR as a rounding
19373 mode. */
19374 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19376 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
19377 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
19379 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19380 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19382 goto decode_success;
19384 break;
19386 case 0x0A:
19387 case 0x0B:
19388 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19389 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19391 if (have66noF2noF3(pfx) && sz == 2) {
19393 Bool isD = opc == 0x0B;
19394 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
19395 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
19396 Int imm = 0;
19398 modrm = getUChar(delta);
19400 if (epartIsReg(modrm)) {
19401 assign( src,
19402 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
19403 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19404 imm = getUChar(delta+1);
19405 if (imm & ~15) goto decode_failure;
19406 delta += 1+1;
19407 DIP( "rounds%c $%d,%s,%s\n",
19408 isD ? 'd' : 's',
19409 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19410 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19411 } else {
19412 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19413 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
19414 imm = getUChar(delta+alen);
19415 if (imm & ~15) goto decode_failure;
19416 delta += alen+1;
19417 DIP( "rounds%c $%d,%s,%s\n",
19418 isD ? 'd' : 's',
19419 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19422 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19423 that encoding is the same as the encoding for IRRoundingMode,
19424 we can use that value directly in the IR as a rounding
19425 mode. */
19426 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
19427 (imm & 4) ? get_sse_roundingmode()
19428 : mkU32(imm & 3),
19429 mkexpr(src)) );
19431 if (isD)
19432 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19433 else
19434 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19436 goto decode_success;
19438 break;
19440 case 0x0C:
19441 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19442 Blend Packed Single Precision Floating-Point Values (XMM) */
19443 if (have66noF2noF3(pfx) && sz == 2) {
19445 Int imm8;
19446 IRTemp dst_vec = newTemp(Ity_V128);
19447 IRTemp src_vec = newTemp(Ity_V128);
19449 modrm = getUChar(delta);
19451 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19453 if ( epartIsReg( modrm ) ) {
19454 imm8 = (Int)getUChar(delta+1);
19455 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19456 delta += 1+1;
19457 DIP( "blendps $%d, %s,%s\n", imm8,
19458 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19459 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19460 } else {
19461 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19462 1/* imm8 is 1 byte after the amode */ );
19463 gen_SEGV_if_not_16_aligned( addr );
19464 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19465 imm8 = (Int)getUChar(delta+alen);
19466 delta += alen+1;
19467 DIP( "blendpd $%d, %s,%s\n",
19468 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19471 putXMMReg( gregOfRexRM(pfx, modrm),
19472 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
19473 goto decode_success;
19475 break;
19477 case 0x0D:
19478 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19479 Blend Packed Double Precision Floating-Point Values (XMM) */
19480 if (have66noF2noF3(pfx) && sz == 2) {
19482 Int imm8;
19483 IRTemp dst_vec = newTemp(Ity_V128);
19484 IRTemp src_vec = newTemp(Ity_V128);
19486 modrm = getUChar(delta);
19487 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19489 if ( epartIsReg( modrm ) ) {
19490 imm8 = (Int)getUChar(delta+1);
19491 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19492 delta += 1+1;
19493 DIP( "blendpd $%d, %s,%s\n", imm8,
19494 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19495 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19496 } else {
19497 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19498 1/* imm8 is 1 byte after the amode */ );
19499 gen_SEGV_if_not_16_aligned( addr );
19500 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19501 imm8 = (Int)getUChar(delta+alen);
19502 delta += alen+1;
19503 DIP( "blendpd $%d, %s,%s\n",
19504 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19507 putXMMReg( gregOfRexRM(pfx, modrm),
19508 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
19509 goto decode_success;
19511 break;
19513 case 0x0E:
19514 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19515 Blend Packed Words (XMM) */
19516 if (have66noF2noF3(pfx) && sz == 2) {
19518 Int imm8;
19519 IRTemp dst_vec = newTemp(Ity_V128);
19520 IRTemp src_vec = newTemp(Ity_V128);
19522 modrm = getUChar(delta);
19524 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19526 if ( epartIsReg( modrm ) ) {
19527 imm8 = (Int)getUChar(delta+1);
19528 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19529 delta += 1+1;
19530 DIP( "pblendw $%d, %s,%s\n", imm8,
19531 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19532 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19533 } else {
19534 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19535 1/* imm8 is 1 byte after the amode */ );
19536 gen_SEGV_if_not_16_aligned( addr );
19537 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19538 imm8 = (Int)getUChar(delta+alen);
19539 delta += alen+1;
19540 DIP( "pblendw $%d, %s,%s\n",
19541 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19544 putXMMReg( gregOfRexRM(pfx, modrm),
19545 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
19546 goto decode_success;
19548 break;
19550 case 0x14:
19551 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19552 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19553 (XMM) */
19554 if (have66noF2noF3(pfx) && sz == 2) {
19555 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
19556 goto decode_success;
19558 break;
19560 case 0x15:
19561 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19562 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19563 (XMM) */
19564 if (have66noF2noF3(pfx) && sz == 2) {
19565 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
19566 goto decode_success;
19568 break;
19570 case 0x16:
19571 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19572 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19573 Note that this insn has the same opcodes as PEXTRQ, but
19574 here the REX.W bit is _not_ present */
19575 if (have66noF2noF3(pfx)
19576 && sz == 2 /* REX.W is _not_ present */) {
19577 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
19578 goto decode_success;
19580 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19581 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19582 Note that this insn has the same opcodes as PEXTRD, but
19583 here the REX.W bit is present */
19584 if (have66noF2noF3(pfx)
19585 && sz == 8 /* REX.W is present */) {
19586 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
19587 goto decode_success;
19589 break;
19591 case 0x17:
19592 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19593 float from xmm reg and store in gen.reg or mem. This is
19594 identical to PEXTRD, except that REX.W appears to be ignored.
19596 if (have66noF2noF3(pfx)
19597 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
19598 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
19599 goto decode_success;
19601 break;
19603 case 0x20:
19604 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19605 Extract byte from r32/m8 and insert into xmm1 */
19606 if (have66noF2noF3(pfx) && sz == 2) {
19607 Int imm8;
19608 IRTemp new8 = newTemp(Ity_I8);
19609 modrm = getUChar(delta);
19610 UInt rG = gregOfRexRM(pfx, modrm);
19611 if ( epartIsReg( modrm ) ) {
19612 UInt rE = eregOfRexRM(pfx,modrm);
19613 imm8 = (Int)(getUChar(delta+1) & 0xF);
19614 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
19615 delta += 1+1;
19616 DIP( "pinsrb $%d,%s,%s\n", imm8,
19617 nameIReg32(rE), nameXMMReg(rG) );
19618 } else {
19619 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19620 imm8 = (Int)(getUChar(delta+alen) & 0xF);
19621 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
19622 delta += alen+1;
19623 DIP( "pinsrb $%d,%s,%s\n",
19624 imm8, dis_buf, nameXMMReg(rG) );
19626 IRTemp src_vec = newTemp(Ity_V128);
19627 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19628 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19629 putXMMReg( rG, mkexpr(res) );
19630 goto decode_success;
19632 break;
19634 case 0x21:
19635 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19636 Insert Packed Single Precision Floating-Point Value (XMM) */
19637 if (have66noF2noF3(pfx) && sz == 2) {
19638 UInt imm8;
19639 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19640 const IRTemp inval = IRTemp_INVALID;
19642 modrm = getUChar(delta);
19643 UInt rG = gregOfRexRM(pfx, modrm);
19645 if ( epartIsReg( modrm ) ) {
19646 UInt rE = eregOfRexRM(pfx, modrm);
19647 IRTemp vE = newTemp(Ity_V128);
19648 assign( vE, getXMMReg(rE) );
19649 IRTemp dsE[4] = { inval, inval, inval, inval };
19650 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
19651 imm8 = getUChar(delta+1);
19652 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
19653 delta += 1+1;
19654 DIP( "insertps $%u, %s,%s\n",
19655 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19656 } else {
19657 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19658 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19659 imm8 = getUChar(delta+alen);
19660 delta += alen+1;
19661 DIP( "insertps $%u, %s,%s\n",
19662 imm8, dis_buf, nameXMMReg(rG) );
19665 IRTemp vG = newTemp(Ity_V128);
19666 assign( vG, getXMMReg(rG) );
19668 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
19669 goto decode_success;
19671 break;
19673 case 0x22:
19674 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19675 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19676 if (have66noF2noF3(pfx)
19677 && sz == 2 /* REX.W is NOT present */) {
19678 Int imm8_10;
19679 IRTemp src_u32 = newTemp(Ity_I32);
19680 modrm = getUChar(delta);
19681 UInt rG = gregOfRexRM(pfx, modrm);
19683 if ( epartIsReg( modrm ) ) {
19684 UInt rE = eregOfRexRM(pfx,modrm);
19685 imm8_10 = (Int)(getUChar(delta+1) & 3);
19686 assign( src_u32, getIReg32( rE ) );
19687 delta += 1+1;
19688 DIP( "pinsrd $%d, %s,%s\n",
19689 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
19690 } else {
19691 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19692 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19693 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
19694 delta += alen+1;
19695 DIP( "pinsrd $%d, %s,%s\n",
19696 imm8_10, dis_buf, nameXMMReg(rG) );
19699 IRTemp src_vec = newTemp(Ity_V128);
19700 assign(src_vec, getXMMReg( rG ));
19701 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19702 putXMMReg( rG, mkexpr(res_vec) );
19703 goto decode_success;
19705 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19706 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19707 if (have66noF2noF3(pfx)
19708 && sz == 8 /* REX.W is present */) {
19709 Int imm8_0;
19710 IRTemp src_u64 = newTemp(Ity_I64);
19711 modrm = getUChar(delta);
19712 UInt rG = gregOfRexRM(pfx, modrm);
19714 if ( epartIsReg( modrm ) ) {
19715 UInt rE = eregOfRexRM(pfx,modrm);
19716 imm8_0 = (Int)(getUChar(delta+1) & 1);
19717 assign( src_u64, getIReg64( rE ) );
19718 delta += 1+1;
19719 DIP( "pinsrq $%d, %s,%s\n",
19720 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
19721 } else {
19722 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19723 imm8_0 = (Int)(getUChar(delta+alen) & 1);
19724 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
19725 delta += alen+1;
19726 DIP( "pinsrq $%d, %s,%s\n",
19727 imm8_0, dis_buf, nameXMMReg(rG) );
19730 IRTemp src_vec = newTemp(Ity_V128);
19731 assign(src_vec, getXMMReg( rG ));
19732 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19733 putXMMReg( rG, mkexpr(res_vec) );
19734 goto decode_success;
19736 break;
19738 case 0x40:
19739 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19740 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19741 if (have66noF2noF3(pfx) && sz == 2) {
19742 modrm = getUChar(delta);
19743 Int imm8;
19744 IRTemp src_vec = newTemp(Ity_V128);
19745 IRTemp dst_vec = newTemp(Ity_V128);
19746 UInt rG = gregOfRexRM(pfx, modrm);
19747 assign( dst_vec, getXMMReg( rG ) );
19748 if ( epartIsReg( modrm ) ) {
19749 UInt rE = eregOfRexRM(pfx, modrm);
19750 imm8 = (Int)getUChar(delta+1);
19751 assign( src_vec, getXMMReg(rE) );
19752 delta += 1+1;
19753 DIP( "dpps $%d, %s,%s\n",
19754 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19755 } else {
19756 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19757 1/* imm8 is 1 byte after the amode */ );
19758 gen_SEGV_if_not_16_aligned( addr );
19759 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19760 imm8 = (Int)getUChar(delta+alen);
19761 delta += alen+1;
19762 DIP( "dpps $%d, %s,%s\n",
19763 imm8, dis_buf, nameXMMReg(rG) );
19765 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19766 putXMMReg( rG, mkexpr(res) );
19767 goto decode_success;
19769 break;
19771 case 0x41:
19772 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19773 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19774 if (have66noF2noF3(pfx) && sz == 2) {
19775 modrm = getUChar(delta);
19776 Int imm8;
19777 IRTemp src_vec = newTemp(Ity_V128);
19778 IRTemp dst_vec = newTemp(Ity_V128);
19779 UInt rG = gregOfRexRM(pfx, modrm);
19780 assign( dst_vec, getXMMReg( rG ) );
19781 if ( epartIsReg( modrm ) ) {
19782 UInt rE = eregOfRexRM(pfx, modrm);
19783 imm8 = (Int)getUChar(delta+1);
19784 assign( src_vec, getXMMReg(rE) );
19785 delta += 1+1;
19786 DIP( "dppd $%d, %s,%s\n",
19787 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19788 } else {
19789 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19790 1/* imm8 is 1 byte after the amode */ );
19791 gen_SEGV_if_not_16_aligned( addr );
19792 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19793 imm8 = (Int)getUChar(delta+alen);
19794 delta += alen+1;
19795 DIP( "dppd $%d, %s,%s\n",
19796 imm8, dis_buf, nameXMMReg(rG) );
19798 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19799 putXMMReg( rG, mkexpr(res) );
19800 goto decode_success;
19802 break;
19804 case 0x42:
19805 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19806 Multiple Packed Sums of Absolule Difference (XMM) */
19807 if (have66noF2noF3(pfx) && sz == 2) {
19808 Int imm8;
19809 IRTemp src_vec = newTemp(Ity_V128);
19810 IRTemp dst_vec = newTemp(Ity_V128);
19811 modrm = getUChar(delta);
19812 UInt rG = gregOfRexRM(pfx, modrm);
19814 assign( dst_vec, getXMMReg(rG) );
19816 if ( epartIsReg( modrm ) ) {
19817 UInt rE = eregOfRexRM(pfx, modrm);
19819 imm8 = (Int)getUChar(delta+1);
19820 assign( src_vec, getXMMReg(rE) );
19821 delta += 1+1;
19822 DIP( "mpsadbw $%d, %s,%s\n", imm8,
19823 nameXMMReg(rE), nameXMMReg(rG) );
19824 } else {
19825 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19826 1/* imm8 is 1 byte after the amode */ );
19827 gen_SEGV_if_not_16_aligned( addr );
19828 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19829 imm8 = (Int)getUChar(delta+alen);
19830 delta += alen+1;
19831 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
19834 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
19835 goto decode_success;
19837 break;
19839 case 0x44:
19840 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19841 * Carry-less multiplication of selected XMM quadwords into XMM
19842 * registers (a.k.a multiplication of polynomials over GF(2))
19844 if (have66noF2noF3(pfx) && sz == 2) {
19846 Int imm8;
19847 IRTemp svec = newTemp(Ity_V128);
19848 IRTemp dvec = newTemp(Ity_V128);
19849 modrm = getUChar(delta);
19850 UInt rG = gregOfRexRM(pfx, modrm);
19852 assign( dvec, getXMMReg(rG) );
19854 if ( epartIsReg( modrm ) ) {
19855 UInt rE = eregOfRexRM(pfx, modrm);
19856 imm8 = (Int)getUChar(delta+1);
19857 assign( svec, getXMMReg(rE) );
19858 delta += 1+1;
19859 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
19860 nameXMMReg(rE), nameXMMReg(rG) );
19861 } else {
19862 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19863 1/* imm8 is 1 byte after the amode */ );
19864 gen_SEGV_if_not_16_aligned( addr );
19865 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19866 imm8 = (Int)getUChar(delta+alen);
19867 delta += alen+1;
19868 DIP( "pclmulqdq $%d, %s,%s\n",
19869 imm8, dis_buf, nameXMMReg(rG) );
19872 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
19873 goto decode_success;
19875 break;
19877 case 0x60:
19878 case 0x61:
19879 case 0x62:
19880 case 0x63:
19881 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19882 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19883 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19884 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19885 (selected special cases that actually occur in glibc,
19886 not by any means a complete implementation.)
19888 if (have66noF2noF3(pfx) && sz == 2) {
19889 Long delta0 = delta;
19890 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19891 if (delta > delta0) goto decode_success;
19892 /* else fall though; dis_PCMPxSTRx failed to decode it */
19894 break;
19896 case 0xDF:
19897 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19898 if (have66noF2noF3(pfx) && sz == 2) {
19899 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
19900 goto decode_success;
19902 break;
19904 default:
19905 break;
19909 decode_failure:
19910 *decode_OK = False;
19911 return deltaIN;
19913 decode_success:
19914 *decode_OK = True;
19915 return delta;
19919 /*------------------------------------------------------------*/
19920 /*--- ---*/
19921 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19922 /*--- ---*/
19923 /*------------------------------------------------------------*/
19925 __attribute__((noinline))
19926 static
19927 Long dis_ESC_NONE (
19928 /*MB_OUT*/DisResult* dres,
19929 /*MB_OUT*/Bool* expect_CAS,
19930 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
19931 Bool resteerCisOk,
19932 void* callback_opaque,
19933 const VexArchInfo* archinfo,
19934 const VexAbiInfo* vbi,
19935 Prefix pfx, Int sz, Long deltaIN
19938 Long d64 = 0;
19939 UChar abyte = 0;
19940 IRTemp addr = IRTemp_INVALID;
19941 IRTemp t1 = IRTemp_INVALID;
19942 IRTemp t2 = IRTemp_INVALID;
19943 IRTemp t3 = IRTemp_INVALID;
19944 IRTemp t4 = IRTemp_INVALID;
19945 IRTemp t5 = IRTemp_INVALID;
19946 IRType ty = Ity_INVALID;
19947 UChar modrm = 0;
19948 Int am_sz = 0;
19949 Int d_sz = 0;
19950 Int alen = 0;
19951 HChar dis_buf[50];
19953 Long delta = deltaIN;
19954 UChar opc = getUChar(delta); delta++;
19956 /* delta now points at the modrm byte. In most of the cases that
19957 follow, neither the F2 nor F3 prefixes are allowed. However,
19958 for some basic arithmetic operations we have to allow F2/XACQ or
19959 F3/XREL in the case where the destination is memory and the LOCK
19960 prefix is also present. Do this check by looking at the modrm
19961 byte but not advancing delta over it. */
19962 /* By default, F2 and F3 are not allowed, so let's start off with
19963 that setting. */
19964 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19965 { UChar tmp_modrm = getUChar(delta);
19966 switch (opc) {
19967 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19968 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19969 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19970 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19971 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19972 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19973 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19974 if (!epartIsReg(tmp_modrm)
19975 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19976 /* dst is mem, and we have F2 or F3 but not both */
19977 validF2orF3 = True;
19979 break;
19980 default:
19981 break;
19985 /* Now, in the switch below, for the opc values examined by the
19986 switch above, use validF2orF3 rather than looking at pfx
19987 directly. */
19988 switch (opc) {
19990 case 0x00: /* ADD Gb,Eb */
19991 if (!validF2orF3) goto decode_failure;
19992 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19993 return delta;
19994 case 0x01: /* ADD Gv,Ev */
19995 if (!validF2orF3) goto decode_failure;
19996 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19997 return delta;
19999 case 0x02: /* ADD Eb,Gb */
20000 if (haveF2orF3(pfx)) goto decode_failure;
20001 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
20002 return delta;
20003 case 0x03: /* ADD Ev,Gv */
20004 if (haveF2orF3(pfx)) goto decode_failure;
20005 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
20006 return delta;
20008 case 0x04: /* ADD Ib, AL */
20009 if (haveF2orF3(pfx)) goto decode_failure;
20010 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
20011 return delta;
20012 case 0x05: /* ADD Iv, eAX */
20013 if (haveF2orF3(pfx)) goto decode_failure;
20014 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
20015 return delta;
20017 case 0x08: /* OR Gb,Eb */
20018 if (!validF2orF3) goto decode_failure;
20019 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
20020 return delta;
20021 case 0x09: /* OR Gv,Ev */
20022 if (!validF2orF3) goto decode_failure;
20023 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
20024 return delta;
20026 case 0x0A: /* OR Eb,Gb */
20027 if (haveF2orF3(pfx)) goto decode_failure;
20028 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
20029 return delta;
20030 case 0x0B: /* OR Ev,Gv */
20031 if (haveF2orF3(pfx)) goto decode_failure;
20032 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
20033 return delta;
20035 case 0x0C: /* OR Ib, AL */
20036 if (haveF2orF3(pfx)) goto decode_failure;
20037 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
20038 return delta;
20039 case 0x0D: /* OR Iv, eAX */
20040 if (haveF2orF3(pfx)) goto decode_failure;
20041 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
20042 return delta;
20044 case 0x10: /* ADC Gb,Eb */
20045 if (!validF2orF3) goto decode_failure;
20046 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
20047 return delta;
20048 case 0x11: /* ADC Gv,Ev */
20049 if (!validF2orF3) goto decode_failure;
20050 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
20051 return delta;
20053 case 0x12: /* ADC Eb,Gb */
20054 if (haveF2orF3(pfx)) goto decode_failure;
20055 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
20056 return delta;
20057 case 0x13: /* ADC Ev,Gv */
20058 if (haveF2orF3(pfx)) goto decode_failure;
20059 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
20060 return delta;
20062 case 0x14: /* ADC Ib, AL */
20063 if (haveF2orF3(pfx)) goto decode_failure;
20064 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
20065 return delta;
20066 case 0x15: /* ADC Iv, eAX */
20067 if (haveF2orF3(pfx)) goto decode_failure;
20068 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
20069 return delta;
20071 case 0x18: /* SBB Gb,Eb */
20072 if (!validF2orF3) goto decode_failure;
20073 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
20074 return delta;
20075 case 0x19: /* SBB Gv,Ev */
20076 if (!validF2orF3) goto decode_failure;
20077 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20078 return delta;
20080 case 0x1A: /* SBB Eb,Gb */
20081 if (haveF2orF3(pfx)) goto decode_failure;
20082 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
20083 return delta;
20084 case 0x1B: /* SBB Ev,Gv */
20085 if (haveF2orF3(pfx)) goto decode_failure;
20086 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20087 return delta;
20089 case 0x1C: /* SBB Ib, AL */
20090 if (haveF2orF3(pfx)) goto decode_failure;
20091 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
20092 return delta;
20093 case 0x1D: /* SBB Iv, eAX */
20094 if (haveF2orF3(pfx)) goto decode_failure;
20095 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
20096 return delta;
20098 case 0x20: /* AND Gb,Eb */
20099 if (!validF2orF3) goto decode_failure;
20100 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20101 return delta;
20102 case 0x21: /* AND Gv,Ev */
20103 if (!validF2orF3) goto decode_failure;
20104 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20105 return delta;
20107 case 0x22: /* AND Eb,Gb */
20108 if (haveF2orF3(pfx)) goto decode_failure;
20109 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20110 return delta;
20111 case 0x23: /* AND Ev,Gv */
20112 if (haveF2orF3(pfx)) goto decode_failure;
20113 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20114 return delta;
20116 case 0x24: /* AND Ib, AL */
20117 if (haveF2orF3(pfx)) goto decode_failure;
20118 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
20119 return delta;
20120 case 0x25: /* AND Iv, eAX */
20121 if (haveF2orF3(pfx)) goto decode_failure;
20122 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
20123 return delta;
20125 case 0x28: /* SUB Gb,Eb */
20126 if (!validF2orF3) goto decode_failure;
20127 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20128 return delta;
20129 case 0x29: /* SUB Gv,Ev */
20130 if (!validF2orF3) goto decode_failure;
20131 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20132 return delta;
20134 case 0x2A: /* SUB Eb,Gb */
20135 if (haveF2orF3(pfx)) goto decode_failure;
20136 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20137 return delta;
20138 case 0x2B: /* SUB Ev,Gv */
20139 if (haveF2orF3(pfx)) goto decode_failure;
20140 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20141 return delta;
20143 case 0x2C: /* SUB Ib, AL */
20144 if (haveF2orF3(pfx)) goto decode_failure;
20145 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
20146 return delta;
20147 case 0x2D: /* SUB Iv, eAX */
20148 if (haveF2orF3(pfx)) goto decode_failure;
20149 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
20150 return delta;
20152 case 0x30: /* XOR Gb,Eb */
20153 if (!validF2orF3) goto decode_failure;
20154 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20155 return delta;
20156 case 0x31: /* XOR Gv,Ev */
20157 if (!validF2orF3) goto decode_failure;
20158 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20159 return delta;
20161 case 0x32: /* XOR Eb,Gb */
20162 if (haveF2orF3(pfx)) goto decode_failure;
20163 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20164 return delta;
20165 case 0x33: /* XOR Ev,Gv */
20166 if (haveF2orF3(pfx)) goto decode_failure;
20167 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20168 return delta;
20170 case 0x34: /* XOR Ib, AL */
20171 if (haveF2orF3(pfx)) goto decode_failure;
20172 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
20173 return delta;
20174 case 0x35: /* XOR Iv, eAX */
20175 if (haveF2orF3(pfx)) goto decode_failure;
20176 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
20177 return delta;
20179 case 0x38: /* CMP Gb,Eb */
20180 if (haveF2orF3(pfx)) goto decode_failure;
20181 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20182 return delta;
20183 case 0x39: /* CMP Gv,Ev */
20184 if (haveF2orF3(pfx)) goto decode_failure;
20185 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20186 return delta;
20188 case 0x3A: /* CMP Eb,Gb */
20189 if (haveF2orF3(pfx)) goto decode_failure;
20190 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20191 return delta;
20192 case 0x3B: /* CMP Ev,Gv */
20193 if (haveF2orF3(pfx)) goto decode_failure;
20194 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20195 return delta;
20197 case 0x3C: /* CMP Ib, AL */
20198 if (haveF2orF3(pfx)) goto decode_failure;
20199 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
20200 return delta;
20201 case 0x3D: /* CMP Iv, eAX */
20202 if (haveF2orF3(pfx)) goto decode_failure;
20203 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
20204 return delta;
20206 case 0x50: /* PUSH eAX */
20207 case 0x51: /* PUSH eCX */
20208 case 0x52: /* PUSH eDX */
20209 case 0x53: /* PUSH eBX */
20210 case 0x55: /* PUSH eBP */
20211 case 0x56: /* PUSH eSI */
20212 case 0x57: /* PUSH eDI */
20213 case 0x54: /* PUSH eSP */
20214 /* This is the Right Way, in that the value to be pushed is
20215 established before %rsp is changed, so that pushq %rsp
20216 correctly pushes the old value. */
20217 if (haveF2orF3(pfx)) goto decode_failure;
20218 vassert(sz == 2 || sz == 4 || sz == 8);
20219 if (sz == 4)
20220 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
20221 ty = sz==2 ? Ity_I16 : Ity_I64;
20222 t1 = newTemp(ty);
20223 t2 = newTemp(Ity_I64);
20224 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
20225 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
20226 putIReg64(R_RSP, mkexpr(t2) );
20227 storeLE(mkexpr(t2),mkexpr(t1));
20228 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
20229 return delta;
20231 case 0x58: /* POP eAX */
20232 case 0x59: /* POP eCX */
20233 case 0x5A: /* POP eDX */
20234 case 0x5B: /* POP eBX */
20235 case 0x5D: /* POP eBP */
20236 case 0x5E: /* POP eSI */
20237 case 0x5F: /* POP eDI */
20238 case 0x5C: /* POP eSP */
20239 if (haveF2orF3(pfx)) goto decode_failure;
20240 vassert(sz == 2 || sz == 4 || sz == 8);
20241 if (sz == 4)
20242 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20243 t1 = newTemp(szToITy(sz));
20244 t2 = newTemp(Ity_I64);
20245 assign(t2, getIReg64(R_RSP));
20246 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
20247 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20248 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
20249 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
20250 return delta;
20252 case 0x63: /* MOVSX */
20253 if (haveF2orF3(pfx)) goto decode_failure;
20254 if (haveREX(pfx) && 1==getRexW(pfx)) {
20255 vassert(sz == 8);
20256 /* movsx r/m32 to r64 */
20257 modrm = getUChar(delta);
20258 if (epartIsReg(modrm)) {
20259 delta++;
20260 putIRegG(8, pfx, modrm,
20261 unop(Iop_32Sto64,
20262 getIRegE(4, pfx, modrm)));
20263 DIP("movslq %s,%s\n",
20264 nameIRegE(4, pfx, modrm),
20265 nameIRegG(8, pfx, modrm));
20266 return delta;
20267 } else {
20268 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20269 delta += alen;
20270 putIRegG(8, pfx, modrm,
20271 unop(Iop_32Sto64,
20272 loadLE(Ity_I32, mkexpr(addr))));
20273 DIP("movslq %s,%s\n", dis_buf,
20274 nameIRegG(8, pfx, modrm));
20275 return delta;
20277 } else {
20278 goto decode_failure;
20281 case 0x68: /* PUSH Iv */
20282 if (haveF2orF3(pfx)) goto decode_failure;
20283 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20284 if (sz == 4) sz = 8;
20285 d64 = getSDisp(imin(4,sz),delta);
20286 delta += imin(4,sz);
20287 goto do_push_I;
20289 case 0x69: /* IMUL Iv, Ev, Gv */
20290 if (haveF2orF3(pfx)) goto decode_failure;
20291 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
20292 return delta;
20294 case 0x6A: /* PUSH Ib, sign-extended to sz */
20295 if (haveF2orF3(pfx)) goto decode_failure;
20296 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20297 if (sz == 4) sz = 8;
20298 d64 = getSDisp8(delta); delta += 1;
20299 goto do_push_I;
20300 do_push_I:
20301 ty = szToITy(sz);
20302 t1 = newTemp(Ity_I64);
20303 t2 = newTemp(ty);
20304 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20305 putIReg64(R_RSP, mkexpr(t1) );
20306 /* stop mkU16 asserting if d32 is a negative 16-bit number
20307 (bug #132813) */
20308 if (ty == Ity_I16)
20309 d64 &= 0xFFFF;
20310 storeLE( mkexpr(t1), mkU(ty,d64) );
20311 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
20312 return delta;
20314 case 0x6B: /* IMUL Ib, Ev, Gv */
20315 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
20316 return delta;
20318 case 0x70:
20319 case 0x71:
20320 case 0x72: /* JBb/JNAEb (jump below) */
20321 case 0x73: /* JNBb/JAEb (jump not below) */
20322 case 0x74: /* JZb/JEb (jump zero) */
20323 case 0x75: /* JNZb/JNEb (jump not zero) */
20324 case 0x76: /* JBEb/JNAb (jump below or equal) */
20325 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20326 case 0x78: /* JSb (jump negative) */
20327 case 0x79: /* JSb (jump not negative) */
20328 case 0x7A: /* JP (jump parity even) */
20329 case 0x7B: /* JNP/JPO (jump parity odd) */
20330 case 0x7C: /* JLb/JNGEb (jump less) */
20331 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20332 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20333 case 0x7F: { /* JGb/JNLEb (jump greater) */
20334 Long jmpDelta;
20335 const HChar* comment = "";
20336 if (haveF3(pfx)) goto decode_failure;
20337 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20338 jmpDelta = getSDisp8(delta);
20339 vassert(-128 <= jmpDelta && jmpDelta < 128);
20340 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
20341 delta++;
20342 if (resteerCisOk
20343 && vex_control.guest_chase_cond
20344 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
20345 && jmpDelta < 0
20346 && resteerOkFn( callback_opaque, (Addr64)d64) ) {
20347 /* Speculation: assume this backward branch is taken. So we
20348 need to emit a side-exit to the insn following this one,
20349 on the negation of the condition, and continue at the
20350 branch target address (d64). If we wind up back at the
20351 first instruction of the trace, just stop; it's better to
20352 let the IR loop unroller handle that case. */
20353 stmt( IRStmt_Exit(
20354 mk_amd64g_calculate_condition(
20355 (AMD64Condcode)(1 ^ (opc - 0x70))),
20356 Ijk_Boring,
20357 IRConst_U64(guest_RIP_bbstart+delta),
20358 OFFB_RIP ) );
20359 dres->whatNext = Dis_ResteerC;
20360 dres->continueAt = d64;
20361 comment = "(assumed taken)";
20363 else
20364 if (resteerCisOk
20365 && vex_control.guest_chase_cond
20366 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
20367 && jmpDelta >= 0
20368 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
20369 /* Speculation: assume this forward branch is not taken. So
20370 we need to emit a side-exit to d64 (the dest) and continue
20371 disassembling at the insn immediately following this
20372 one. */
20373 stmt( IRStmt_Exit(
20374 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
20375 Ijk_Boring,
20376 IRConst_U64(d64),
20377 OFFB_RIP ) );
20378 dres->whatNext = Dis_ResteerC;
20379 dres->continueAt = guest_RIP_bbstart+delta;
20380 comment = "(assumed not taken)";
20382 else {
20383 /* Conservative default translation - end the block at this
20384 point. */
20385 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
20386 guest_RIP_bbstart+delta, d64 );
20387 vassert(dres->whatNext == Dis_StopHere);
20389 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), (ULong)d64,
20390 comment);
20391 return delta;
20394 case 0x80: /* Grp1 Ib,Eb */
20395 modrm = getUChar(delta);
20396 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20397 just one for the mem case and also require LOCK in this case.
20398 Note that this erroneously allows XACQ/XREL on CMP since we
20399 don't check the subopcode here. No big deal. */
20400 if (epartIsReg(modrm) && haveF2orF3(pfx))
20401 goto decode_failure;
20402 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20403 goto decode_failure;
20404 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20405 goto decode_failure;
20406 am_sz = lengthAMode(pfx,delta);
20407 sz = 1;
20408 d_sz = 1;
20409 d64 = getSDisp8(delta + am_sz);
20410 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20411 return delta;
20413 case 0x81: /* Grp1 Iv,Ev */
20414 modrm = getUChar(delta);
20415 /* Same comment as for case 0x80 just above. */
20416 if (epartIsReg(modrm) && haveF2orF3(pfx))
20417 goto decode_failure;
20418 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20419 goto decode_failure;
20420 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20421 goto decode_failure;
20422 am_sz = lengthAMode(pfx,delta);
20423 d_sz = imin(sz,4);
20424 d64 = getSDisp(d_sz, delta + am_sz);
20425 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20426 return delta;
20428 case 0x83: /* Grp1 Ib,Ev */
20429 if (haveF2orF3(pfx)) goto decode_failure;
20430 modrm = getUChar(delta);
20431 am_sz = lengthAMode(pfx,delta);
20432 d_sz = 1;
20433 d64 = getSDisp8(delta + am_sz);
20434 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20435 return delta;
20437 case 0x84: /* TEST Eb,Gb */
20438 if (haveF2orF3(pfx)) goto decode_failure;
20439 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20440 1, delta, "test" );
20441 return delta;
20443 case 0x85: /* TEST Ev,Gv */
20444 if (haveF2orF3(pfx)) goto decode_failure;
20445 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20446 sz, delta, "test" );
20447 return delta;
20449 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20450 prefix. Therefore, generate CAS regardless of the presence or
20451 otherwise of a LOCK prefix. */
20452 case 0x86: /* XCHG Gb,Eb */
20453 sz = 1;
20454 /* Fall through ... */
20455 case 0x87: /* XCHG Gv,Ev */
20456 modrm = getUChar(delta);
20457 /* Check whether F2 or F3 are allowable. For the mem case, one
20458 or the othter but not both are. We don't care about the
20459 presence of LOCK in this case -- XCHG is unusual in this
20460 respect. */
20461 if (haveF2orF3(pfx)) {
20462 if (epartIsReg(modrm)) {
20463 goto decode_failure;
20464 } else {
20465 if (haveF2andF3(pfx))
20466 goto decode_failure;
20469 ty = szToITy(sz);
20470 t1 = newTemp(ty); t2 = newTemp(ty);
20471 if (epartIsReg(modrm)) {
20472 assign(t1, getIRegE(sz, pfx, modrm));
20473 assign(t2, getIRegG(sz, pfx, modrm));
20474 putIRegG(sz, pfx, modrm, mkexpr(t1));
20475 putIRegE(sz, pfx, modrm, mkexpr(t2));
20476 delta++;
20477 DIP("xchg%c %s, %s\n",
20478 nameISize(sz), nameIRegG(sz, pfx, modrm),
20479 nameIRegE(sz, pfx, modrm));
20480 } else {
20481 *expect_CAS = True;
20482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20483 assign( t1, loadLE(ty, mkexpr(addr)) );
20484 assign( t2, getIRegG(sz, pfx, modrm) );
20485 casLE( mkexpr(addr),
20486 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
20487 putIRegG( sz, pfx, modrm, mkexpr(t1) );
20488 delta += alen;
20489 DIP("xchg%c %s, %s\n", nameISize(sz),
20490 nameIRegG(sz, pfx, modrm), dis_buf);
20492 return delta;
20494 case 0x88: { /* MOV Gb,Eb */
20495 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20496 Bool ok = True;
20497 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
20498 if (!ok) goto decode_failure;
20499 return delta;
20502 case 0x89: { /* MOV Gv,Ev */
20503 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20504 Bool ok = True;
20505 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
20506 if (!ok) goto decode_failure;
20507 return delta;
20510 case 0x8A: /* MOV Eb,Gb */
20511 if (haveF2orF3(pfx)) goto decode_failure;
20512 delta = dis_mov_E_G(vbi, pfx, 1, delta);
20513 return delta;
20515 case 0x8B: /* MOV Ev,Gv */
20516 if (haveF2orF3(pfx)) goto decode_failure;
20517 delta = dis_mov_E_G(vbi, pfx, sz, delta);
20518 return delta;
20520 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20521 if (haveF2orF3(pfx)) goto decode_failure;
20522 delta = dis_mov_S_E(vbi, pfx, sz, delta);
20523 return delta;
20525 case 0x8D: /* LEA M,Gv */
20526 if (haveF2orF3(pfx)) goto decode_failure;
20527 if (sz != 4 && sz != 8)
20528 goto decode_failure;
20529 modrm = getUChar(delta);
20530 if (epartIsReg(modrm))
20531 goto decode_failure;
20532 /* NOTE! this is the one place where a segment override prefix
20533 has no effect on the address calculation. Therefore we clear
20534 any segment override bits in pfx. */
20535 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
20536 delta += alen;
20537 /* This is a hack. But it isn't clear that really doing the
20538 calculation at 32 bits is really worth it. Hence for leal,
20539 do the full 64-bit calculation and then truncate it. */
20540 putIRegG( sz, pfx, modrm,
20541 sz == 4
20542 ? unop(Iop_64to32, mkexpr(addr))
20543 : mkexpr(addr)
20545 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
20546 nameIRegG(sz,pfx,modrm));
20547 return delta;
20549 case 0x8F: { /* POPQ m64 / POPW m16 */
20550 Int len;
20551 UChar rm;
20552 /* There is no encoding for 32-bit pop in 64-bit mode.
20553 So sz==4 actually means sz==8. */
20554 if (haveF2orF3(pfx)) goto decode_failure;
20555 vassert(sz == 2 || sz == 4
20556 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
20557 if (sz == 4) sz = 8;
20558 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20560 rm = getUChar(delta);
20562 /* make sure this instruction is correct POP */
20563 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
20564 goto decode_failure;
20565 /* and has correct size */
20566 vassert(sz == 8);
20568 t1 = newTemp(Ity_I64);
20569 t3 = newTemp(Ity_I64);
20570 assign( t1, getIReg64(R_RSP) );
20571 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
20573 /* Increase RSP; must be done before the STORE. Intel manual
20574 says: If the RSP register is used as a base register for
20575 addressing a destination operand in memory, the POP
20576 instruction computes the effective address of the operand
20577 after it increments the RSP register. */
20578 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
20580 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
20581 storeLE( mkexpr(addr), mkexpr(t3) );
20583 DIP("popl %s\n", dis_buf);
20585 delta += len;
20586 return delta;
20589 case 0x90: /* XCHG eAX,eAX */
20590 /* detect and handle F3 90 (rep nop) specially */
20591 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
20592 DIP("rep nop (P4 pause)\n");
20593 /* "observe" the hint. The Vex client needs to be careful not
20594 to cause very long delays as a result, though. */
20595 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
20596 vassert(dres->whatNext == Dis_StopHere);
20597 return delta;
20599 /* detect and handle NOPs specially */
20600 if (/* F2/F3 probably change meaning completely */
20601 !haveF2orF3(pfx)
20602 /* If REX.B is 1, we're not exchanging rAX with itself */
20603 && getRexB(pfx)==0 ) {
20604 DIP("nop\n");
20605 return delta;
20607 /* else fall through to normal case. */
20608 case 0x91: /* XCHG rAX,rCX */
20609 case 0x92: /* XCHG rAX,rDX */
20610 case 0x93: /* XCHG rAX,rBX */
20611 case 0x94: /* XCHG rAX,rSP */
20612 case 0x95: /* XCHG rAX,rBP */
20613 case 0x96: /* XCHG rAX,rSI */
20614 case 0x97: /* XCHG rAX,rDI */
20615 /* guard against mutancy */
20616 if (haveF2orF3(pfx)) goto decode_failure;
20617 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20618 return delta;
20620 case 0x98: /* CBW */
20621 if (haveF2orF3(pfx)) goto decode_failure;
20622 if (sz == 8) {
20623 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20624 DIP(/*"cdqe\n"*/"cltq");
20625 return delta;
20627 if (sz == 4) {
20628 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20629 DIP("cwtl\n");
20630 return delta;
20632 if (sz == 2) {
20633 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20634 DIP("cbw\n");
20635 return delta;
20637 goto decode_failure;
20639 case 0x99: /* CWD/CDQ/CQO */
20640 if (haveF2orF3(pfx)) goto decode_failure;
20641 vassert(sz == 2 || sz == 4 || sz == 8);
20642 ty = szToITy(sz);
20643 putIRegRDX( sz,
20644 binop(mkSizedOp(ty,Iop_Sar8),
20645 getIRegRAX(sz),
20646 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20647 DIP(sz == 2 ? "cwd\n"
20648 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20649 : "cqo\n"));
20650 return delta;
20652 case 0x9B: /* FWAIT (X87 insn) */
20653 /* ignore? */
20654 DIP("fwait\n");
20655 return delta;
20657 case 0x9C: /* PUSHF */ {
20658 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20659 mode. So sz==4 actually means sz==8. */
20660 /* 24 July 06: has also been seen with a redundant REX prefix,
20661 so must also allow sz==8. */
20662 if (haveF2orF3(pfx)) goto decode_failure;
20663 vassert(sz == 2 || sz == 4 || sz == 8);
20664 if (sz == 4) sz = 8;
20665 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20667 t1 = newTemp(Ity_I64);
20668 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20669 putIReg64(R_RSP, mkexpr(t1) );
20671 t2 = newTemp(Ity_I64);
20672 assign( t2, mk_amd64g_calculate_rflags_all() );
20674 /* Patch in the D flag. This can simply be a copy of bit 10 of
20675 baseBlock[OFFB_DFLAG]. */
20676 t3 = newTemp(Ity_I64);
20677 assign( t3, binop(Iop_Or64,
20678 mkexpr(t2),
20679 binop(Iop_And64,
20680 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20681 mkU64(1<<10)))
20684 /* And patch in the ID flag. */
20685 t4 = newTemp(Ity_I64);
20686 assign( t4, binop(Iop_Or64,
20687 mkexpr(t3),
20688 binop(Iop_And64,
20689 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20690 mkU8(21)),
20691 mkU64(1<<21)))
20694 /* And patch in the AC flag too. */
20695 t5 = newTemp(Ity_I64);
20696 assign( t5, binop(Iop_Or64,
20697 mkexpr(t4),
20698 binop(Iop_And64,
20699 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20700 mkU8(18)),
20701 mkU64(1<<18)))
20704 /* if sz==2, the stored value needs to be narrowed. */
20705 if (sz == 2)
20706 storeLE( mkexpr(t1), unop(Iop_32to16,
20707 unop(Iop_64to32,mkexpr(t5))) );
20708 else
20709 storeLE( mkexpr(t1), mkexpr(t5) );
20711 DIP("pushf%c\n", nameISize(sz));
20712 return delta;
20715 case 0x9D: /* POPF */
20716 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20717 So sz==4 actually means sz==8. */
20718 if (haveF2orF3(pfx)) goto decode_failure;
20719 vassert(sz == 2 || sz == 4);
20720 if (sz == 4) sz = 8;
20721 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20722 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20723 assign(t2, getIReg64(R_RSP));
20724 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20725 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20726 /* t1 is the flag word. Mask out everything except OSZACP and
20727 set the flags thunk to AMD64G_CC_OP_COPY. */
20728 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20729 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20730 stmt( IRStmt_Put( OFFB_CC_DEP1,
20731 binop(Iop_And64,
20732 mkexpr(t1),
20733 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20734 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20735 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20740 /* Also need to set the D flag, which is held in bit 10 of t1.
20741 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20742 stmt( IRStmt_Put(
20743 OFFB_DFLAG,
20744 IRExpr_ITE(
20745 unop(Iop_64to1,
20746 binop(Iop_And64,
20747 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
20748 mkU64(1))),
20749 mkU64(0xFFFFFFFFFFFFFFFFULL),
20750 mkU64(1)))
20753 /* And set the ID flag */
20754 stmt( IRStmt_Put(
20755 OFFB_IDFLAG,
20756 IRExpr_ITE(
20757 unop(Iop_64to1,
20758 binop(Iop_And64,
20759 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
20760 mkU64(1))),
20761 mkU64(1),
20762 mkU64(0)))
20765 /* And set the AC flag too */
20766 stmt( IRStmt_Put(
20767 OFFB_ACFLAG,
20768 IRExpr_ITE(
20769 unop(Iop_64to1,
20770 binop(Iop_And64,
20771 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
20772 mkU64(1))),
20773 mkU64(1),
20774 mkU64(0)))
20777 DIP("popf%c\n", nameISize(sz));
20778 return delta;
20780 case 0x9E: /* SAHF */
20781 codegen_SAHF();
20782 DIP("sahf\n");
20783 return delta;
20785 case 0x9F: /* LAHF */
20786 codegen_LAHF();
20787 DIP("lahf\n");
20788 return delta;
20790 case 0xA0: /* MOV Ob,AL */
20791 if (have66orF2orF3(pfx)) goto decode_failure;
20792 sz = 1;
20793 /* Fall through ... */
20794 case 0xA1: /* MOV Ov,eAX */
20795 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20796 goto decode_failure;
20797 d64 = getDisp64(delta);
20798 delta += 8;
20799 ty = szToITy(sz);
20800 addr = newTemp(Ity_I64);
20801 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20802 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20803 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20804 segRegTxt(pfx), (ULong)d64,
20805 nameIRegRAX(sz));
20806 return delta;
20808 case 0xA2: /* MOV AL,Ob */
20809 if (have66orF2orF3(pfx)) goto decode_failure;
20810 sz = 1;
20811 /* Fall through ... */
20812 case 0xA3: /* MOV eAX,Ov */
20813 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20814 goto decode_failure;
20815 d64 = getDisp64(delta);
20816 delta += 8;
20817 ty = szToITy(sz);
20818 addr = newTemp(Ity_I64);
20819 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20820 storeLE( mkexpr(addr), getIRegRAX(sz) );
20821 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20822 segRegTxt(pfx), (ULong)d64);
20823 return delta;
20825 case 0xA4:
20826 case 0xA5:
20827 /* F3 A4: rep movsb */
20828 if (haveF3(pfx) && !haveF2(pfx)) {
20829 if (opc == 0xA4)
20830 sz = 1;
20831 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
20832 guest_RIP_curr_instr,
20833 guest_RIP_bbstart+delta, "rep movs", pfx );
20834 dres->whatNext = Dis_StopHere;
20835 return delta;
20837 /* A4: movsb */
20838 if (!haveF3(pfx) && !haveF2(pfx)) {
20839 if (opc == 0xA4)
20840 sz = 1;
20841 dis_string_op( dis_MOVS, sz, "movs", pfx );
20842 return delta;
20844 goto decode_failure;
20846 case 0xA6:
20847 case 0xA7:
20848 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20849 if (haveF3(pfx) && !haveF2(pfx)) {
20850 if (opc == 0xA6)
20851 sz = 1;
20852 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
20853 guest_RIP_curr_instr,
20854 guest_RIP_bbstart+delta, "repe cmps", pfx );
20855 dres->whatNext = Dis_StopHere;
20856 return delta;
20858 goto decode_failure;
20860 case 0xAA:
20861 case 0xAB:
20862 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20863 if (haveF3(pfx) && !haveF2(pfx)) {
20864 if (opc == 0xAA)
20865 sz = 1;
20866 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
20867 guest_RIP_curr_instr,
20868 guest_RIP_bbstart+delta, "rep stos", pfx );
20869 vassert(dres->whatNext == Dis_StopHere);
20870 return delta;
20872 /* AA/AB: stosb/stos{w,l,q} */
20873 if (!haveF3(pfx) && !haveF2(pfx)) {
20874 if (opc == 0xAA)
20875 sz = 1;
20876 dis_string_op( dis_STOS, sz, "stos", pfx );
20877 return delta;
20879 goto decode_failure;
20881 case 0xA8: /* TEST Ib, AL */
20882 if (haveF2orF3(pfx)) goto decode_failure;
20883 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20884 return delta;
20885 case 0xA9: /* TEST Iv, eAX */
20886 if (haveF2orF3(pfx)) goto decode_failure;
20887 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20888 return delta;
20890 case 0xAC: /* LODS, no REP prefix */
20891 case 0xAD:
20892 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20893 return delta;
20895 case 0xAE:
20896 case 0xAF:
20897 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20898 if (haveF2(pfx) && !haveF3(pfx)) {
20899 if (opc == 0xAE)
20900 sz = 1;
20901 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
20902 guest_RIP_curr_instr,
20903 guest_RIP_bbstart+delta, "repne scas", pfx );
20904 vassert(dres->whatNext == Dis_StopHere);
20905 return delta;
20907 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20908 if (!haveF2(pfx) && haveF3(pfx)) {
20909 if (opc == 0xAE)
20910 sz = 1;
20911 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
20912 guest_RIP_curr_instr,
20913 guest_RIP_bbstart+delta, "repe scas", pfx );
20914 vassert(dres->whatNext == Dis_StopHere);
20915 return delta;
20917 /* AE/AF: scasb/scas{w,l,q} */
20918 if (!haveF2(pfx) && !haveF3(pfx)) {
20919 if (opc == 0xAE)
20920 sz = 1;
20921 dis_string_op( dis_SCAS, sz, "scas", pfx );
20922 return delta;
20924 goto decode_failure;
20926 /* XXXX be careful here with moves to AH/BH/CH/DH */
20927 case 0xB0: /* MOV imm,AL */
20928 case 0xB1: /* MOV imm,CL */
20929 case 0xB2: /* MOV imm,DL */
20930 case 0xB3: /* MOV imm,BL */
20931 case 0xB4: /* MOV imm,AH */
20932 case 0xB5: /* MOV imm,CH */
20933 case 0xB6: /* MOV imm,DH */
20934 case 0xB7: /* MOV imm,BH */
20935 if (haveF2orF3(pfx)) goto decode_failure;
20936 d64 = getUChar(delta);
20937 delta += 1;
20938 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20939 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20940 return delta;
20942 case 0xB8: /* MOV imm,eAX */
20943 case 0xB9: /* MOV imm,eCX */
20944 case 0xBA: /* MOV imm,eDX */
20945 case 0xBB: /* MOV imm,eBX */
20946 case 0xBC: /* MOV imm,eSP */
20947 case 0xBD: /* MOV imm,eBP */
20948 case 0xBE: /* MOV imm,eSI */
20949 case 0xBF: /* MOV imm,eDI */
20950 /* This is the one-and-only place where 64-bit literals are
20951 allowed in the instruction stream. */
20952 if (haveF2orF3(pfx)) goto decode_failure;
20953 if (sz == 8) {
20954 d64 = getDisp64(delta);
20955 delta += 8;
20956 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20957 DIP("movabsq $%lld,%s\n", (Long)d64,
20958 nameIRegRexB(8,pfx,opc-0xB8));
20959 } else {
20960 d64 = getSDisp(imin(4,sz),delta);
20961 delta += imin(4,sz);
20962 putIRegRexB(sz, pfx, opc-0xB8,
20963 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20964 DIP("mov%c $%lld,%s\n", nameISize(sz),
20965 (Long)d64,
20966 nameIRegRexB(sz,pfx,opc-0xB8));
20968 return delta;
20970 case 0xC0: { /* Grp2 Ib,Eb */
20971 Bool decode_OK = True;
20972 if (haveF2orF3(pfx)) goto decode_failure;
20973 modrm = getUChar(delta);
20974 am_sz = lengthAMode(pfx,delta);
20975 d_sz = 1;
20976 d64 = getUChar(delta + am_sz);
20977 sz = 1;
20978 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20979 mkU8(d64 & 0xFF), NULL, &decode_OK );
20980 if (!decode_OK) goto decode_failure;
20981 return delta;
20984 case 0xC1: { /* Grp2 Ib,Ev */
20985 Bool decode_OK = True;
20986 if (haveF2orF3(pfx)) goto decode_failure;
20987 modrm = getUChar(delta);
20988 am_sz = lengthAMode(pfx,delta);
20989 d_sz = 1;
20990 d64 = getUChar(delta + am_sz);
20991 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20992 mkU8(d64 & 0xFF), NULL, &decode_OK );
20993 if (!decode_OK) goto decode_failure;
20994 return delta;
20997 case 0xC2: /* RET imm16 */
20998 if (have66orF3(pfx)) goto decode_failure;
20999 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21000 d64 = getUDisp16(delta);
21001 delta += 2;
21002 dis_ret(dres, vbi, d64);
21003 DIP("ret $%lld\n", d64);
21004 return delta;
21006 case 0xC3: /* RET */
21007 if (have66(pfx)) goto decode_failure;
21008 /* F3 is acceptable on AMD. */
21009 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21010 dis_ret(dres, vbi, 0);
21011 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
21012 return delta;
21014 case 0xC6: /* C6 /0 = MOV Ib,Eb */
21015 sz = 1;
21016 goto maybe_do_Mov_I_E;
21017 case 0xC7: /* C7 /0 = MOV Iv,Ev */
21018 goto maybe_do_Mov_I_E;
21019 maybe_do_Mov_I_E:
21020 modrm = getUChar(delta);
21021 if (gregLO3ofRM(modrm) == 0) {
21022 if (epartIsReg(modrm)) {
21023 /* Neither F2 nor F3 are allowable. */
21024 if (haveF2orF3(pfx)) goto decode_failure;
21025 delta++; /* mod/rm byte */
21026 d64 = getSDisp(imin(4,sz),delta);
21027 delta += imin(4,sz);
21028 putIRegE(sz, pfx, modrm,
21029 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
21030 DIP("mov%c $%lld, %s\n", nameISize(sz),
21031 (Long)d64,
21032 nameIRegE(sz,pfx,modrm));
21033 } else {
21034 if (haveF2(pfx)) goto decode_failure;
21035 /* F3(XRELEASE) is allowable here */
21036 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
21037 /*xtra*/imin(4,sz) );
21038 delta += alen;
21039 d64 = getSDisp(imin(4,sz),delta);
21040 delta += imin(4,sz);
21041 storeLE(mkexpr(addr),
21042 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
21043 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
21045 return delta;
21047 /* BEGIN HACKY SUPPORT FOR xbegin */
21048 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
21049 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21050 delta++; /* mod/rm byte */
21051 d64 = getSDisp(4,delta);
21052 delta += 4;
21053 guest_RIP_next_mustcheck = True;
21054 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21055 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
21056 /* EAX contains the failure status code. Bit 3 is "Set if an
21057 internal buffer overflowed", which seems like the
21058 least-bogus choice we can make here. */
21059 putIRegRAX(4, mkU32(1<<3));
21060 /* And jump to the fail address. */
21061 jmp_lit(dres, Ijk_Boring, failAddr);
21062 vassert(dres->whatNext == Dis_StopHere);
21063 DIP("xbeginq 0x%llx\n", failAddr);
21064 return delta;
21066 /* END HACKY SUPPORT FOR xbegin */
21067 /* BEGIN HACKY SUPPORT FOR xabort */
21068 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
21069 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21070 delta++; /* mod/rm byte */
21071 abyte = getUChar(delta); delta++;
21072 /* There is never a real transaction in progress, so do nothing. */
21073 DIP("xabort $%d", (Int)abyte);
21074 return delta;
21076 /* END HACKY SUPPORT FOR xabort */
21077 goto decode_failure;
21079 case 0xC8: /* ENTER */
21080 /* Same comments re operand size as for LEAVE below apply.
21081 Also, only handles the case "enter $imm16, $0"; other cases
21082 for the second operand (nesting depth) are not handled. */
21083 if (sz != 4)
21084 goto decode_failure;
21085 d64 = getUDisp16(delta);
21086 delta += 2;
21087 vassert(d64 >= 0 && d64 <= 0xFFFF);
21088 if (getUChar(delta) != 0)
21089 goto decode_failure;
21090 delta++;
21091 /* Intel docs seem to suggest:
21092 push rbp
21093 temp = rsp
21094 rbp = temp
21095 rsp = rsp - imm16
21097 t1 = newTemp(Ity_I64);
21098 assign(t1, getIReg64(R_RBP));
21099 t2 = newTemp(Ity_I64);
21100 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21101 putIReg64(R_RSP, mkexpr(t2));
21102 storeLE(mkexpr(t2), mkexpr(t1));
21103 putIReg64(R_RBP, mkexpr(t2));
21104 if (d64 > 0) {
21105 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
21107 DIP("enter $%u, $0\n", (UInt)d64);
21108 return delta;
21110 case 0xC9: /* LEAVE */
21111 /* In 64-bit mode this defaults to a 64-bit operand size. There
21112 is no way to encode a 32-bit variant. Hence sz==4 but we do
21113 it as if sz=8. */
21114 if (sz != 4)
21115 goto decode_failure;
21116 t1 = newTemp(Ity_I64);
21117 t2 = newTemp(Ity_I64);
21118 assign(t1, getIReg64(R_RBP));
21119 /* First PUT RSP looks redundant, but need it because RSP must
21120 always be up-to-date for Memcheck to work... */
21121 putIReg64(R_RSP, mkexpr(t1));
21122 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
21123 putIReg64(R_RBP, mkexpr(t2));
21124 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
21125 DIP("leave\n");
21126 return delta;
21128 case 0xCC: /* INT 3 */
21129 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
21130 vassert(dres->whatNext == Dis_StopHere);
21131 DIP("int $0x3\n");
21132 return delta;
21134 case 0xCD: /* INT imm8 */
21135 d64 = getUChar(delta); delta++;
21137 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21138 if (d64 == 0xD2) {
21139 jmp_lit(dres, Ijk_Sys_int210, guest_RIP_bbstart + delta);
21140 vassert(dres->whatNext == Dis_StopHere);
21141 DIP("int $0xD2\n");
21142 return delta;
21144 goto decode_failure;
21146 case 0xD0: { /* Grp2 1,Eb */
21147 Bool decode_OK = True;
21148 if (haveF2orF3(pfx)) goto decode_failure;
21149 modrm = getUChar(delta);
21150 am_sz = lengthAMode(pfx,delta);
21151 d_sz = 0;
21152 d64 = 1;
21153 sz = 1;
21154 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21155 mkU8(d64), NULL, &decode_OK );
21156 if (!decode_OK) goto decode_failure;
21157 return delta;
21160 case 0xD1: { /* Grp2 1,Ev */
21161 Bool decode_OK = True;
21162 if (haveF2orF3(pfx)) goto decode_failure;
21163 modrm = getUChar(delta);
21164 am_sz = lengthAMode(pfx,delta);
21165 d_sz = 0;
21166 d64 = 1;
21167 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21168 mkU8(d64), NULL, &decode_OK );
21169 if (!decode_OK) goto decode_failure;
21170 return delta;
21173 case 0xD2: { /* Grp2 CL,Eb */
21174 Bool decode_OK = True;
21175 if (haveF2orF3(pfx)) goto decode_failure;
21176 modrm = getUChar(delta);
21177 am_sz = lengthAMode(pfx,delta);
21178 d_sz = 0;
21179 sz = 1;
21180 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21181 getIRegCL(), "%cl", &decode_OK );
21182 if (!decode_OK) goto decode_failure;
21183 return delta;
21186 case 0xD3: { /* Grp2 CL,Ev */
21187 Bool decode_OK = True;
21188 if (haveF2orF3(pfx)) goto decode_failure;
21189 modrm = getUChar(delta);
21190 am_sz = lengthAMode(pfx,delta);
21191 d_sz = 0;
21192 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21193 getIRegCL(), "%cl", &decode_OK );
21194 if (!decode_OK) goto decode_failure;
21195 return delta;
21198 case 0xD8: /* X87 instructions */
21199 case 0xD9:
21200 case 0xDA:
21201 case 0xDB:
21202 case 0xDC:
21203 case 0xDD:
21204 case 0xDE:
21205 case 0xDF: {
21206 Bool redundantREXWok = False;
21208 if (haveF2orF3(pfx))
21209 goto decode_failure;
21211 /* kludge to tolerate redundant rex.w prefixes (should do this
21212 properly one day) */
21213 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21214 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
21215 redundantREXWok = True;
21217 Bool size_OK = False;
21218 if ( sz == 4 )
21219 size_OK = True;
21220 else if ( sz == 8 )
21221 size_OK = redundantREXWok;
21222 else if ( sz == 2 ) {
21223 int mod_rm = getUChar(delta+0);
21224 int reg = gregLO3ofRM(mod_rm);
21225 /* The HotSpot JVM uses these */
21226 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
21227 reg == 4 /* FNSAVE */ ||
21228 reg == 6 /* FRSTOR */ ) )
21229 size_OK = True;
21231 /* AMD manual says 0x66 size override is ignored, except where
21232 it is meaningful */
21233 if (!size_OK)
21234 goto decode_failure;
21236 Bool decode_OK = False;
21237 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
21238 if (!decode_OK)
21239 goto decode_failure;
21241 return delta;
21244 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21245 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21246 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21247 { /* The docs say this uses rCX as a count depending on the
21248 address size override, not the operand one. */
21249 IRExpr* zbit = NULL;
21250 IRExpr* count = NULL;
21251 IRExpr* cond = NULL;
21252 const HChar* xtra = NULL;
21254 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
21255 /* So at this point we've rejected any variants which appear to
21256 be governed by the usual operand-size modifiers. Hence only
21257 the address size prefix can have an effect. It changes the
21258 size from 64 (default) to 32. */
21259 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
21260 delta++;
21261 if (haveASO(pfx)) {
21262 /* 64to32 of 64-bit get is merely a get-put improvement
21263 trick. */
21264 putIReg32(R_RCX, binop(Iop_Sub32,
21265 unop(Iop_64to32, getIReg64(R_RCX)),
21266 mkU32(1)));
21267 } else {
21268 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
21271 /* This is correct, both for 32- and 64-bit versions. If we're
21272 doing a 32-bit dec and the result is zero then the default
21273 zero extension rule will cause the upper 32 bits to be zero
21274 too. Hence a 64-bit check against zero is OK. */
21275 count = getIReg64(R_RCX);
21276 cond = binop(Iop_CmpNE64, count, mkU64(0));
21277 switch (opc) {
21278 case 0xE2:
21279 xtra = "";
21280 break;
21281 case 0xE1:
21282 xtra = "e";
21283 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
21284 cond = mkAnd1(cond, zbit);
21285 break;
21286 case 0xE0:
21287 xtra = "ne";
21288 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
21289 cond = mkAnd1(cond, zbit);
21290 break;
21291 default:
21292 vassert(0);
21294 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
21296 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", (ULong)d64);
21297 return delta;
21300 case 0xE3:
21301 /* JRCXZ or JECXZ, depending address size override. */
21302 if (have66orF2orF3(pfx)) goto decode_failure;
21303 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21304 delta++;
21305 if (haveASO(pfx)) {
21306 /* 32-bit */
21307 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21308 unop(Iop_32Uto64, getIReg32(R_RCX)),
21309 mkU64(0)),
21310 Ijk_Boring,
21311 IRConst_U64(d64),
21312 OFFB_RIP
21314 DIP("jecxz 0x%llx\n", (ULong)d64);
21315 } else {
21316 /* 64-bit */
21317 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21318 getIReg64(R_RCX),
21319 mkU64(0)),
21320 Ijk_Boring,
21321 IRConst_U64(d64),
21322 OFFB_RIP
21324 DIP("jrcxz 0x%llx\n", (ULong)d64);
21326 return delta;
21328 case 0xE4: /* IN imm8, AL */
21329 sz = 1;
21330 t1 = newTemp(Ity_I64);
21331 abyte = getUChar(delta); delta++;
21332 assign(t1, mkU64( abyte & 0xFF ));
21333 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21334 goto do_IN;
21335 case 0xE5: /* IN imm8, eAX */
21336 if (!(sz == 2 || sz == 4)) goto decode_failure;
21337 t1 = newTemp(Ity_I64);
21338 abyte = getUChar(delta); delta++;
21339 assign(t1, mkU64( abyte & 0xFF ));
21340 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21341 goto do_IN;
21342 case 0xEC: /* IN %DX, AL */
21343 sz = 1;
21344 t1 = newTemp(Ity_I64);
21345 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21346 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21347 nameIRegRAX(sz));
21348 goto do_IN;
21349 case 0xED: /* IN %DX, eAX */
21350 if (!(sz == 2 || sz == 4)) goto decode_failure;
21351 t1 = newTemp(Ity_I64);
21352 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21353 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21354 nameIRegRAX(sz));
21355 goto do_IN;
21356 do_IN: {
21357 /* At this point, sz indicates the width, and t1 is a 64-bit
21358 value giving port number. */
21359 IRDirty* d;
21360 if (haveF2orF3(pfx)) goto decode_failure;
21361 vassert(sz == 1 || sz == 2 || sz == 4);
21362 ty = szToITy(sz);
21363 t2 = newTemp(Ity_I64);
21364 d = unsafeIRDirty_1_N(
21366 0/*regparms*/,
21367 "amd64g_dirtyhelper_IN",
21368 &amd64g_dirtyhelper_IN,
21369 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
21371 /* do the call, dumping the result in t2. */
21372 stmt( IRStmt_Dirty(d) );
21373 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
21374 return delta;
21377 case 0xE6: /* OUT AL, imm8 */
21378 sz = 1;
21379 t1 = newTemp(Ity_I64);
21380 abyte = getUChar(delta); delta++;
21381 assign( t1, mkU64( abyte & 0xFF ) );
21382 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21383 goto do_OUT;
21384 case 0xE7: /* OUT eAX, imm8 */
21385 if (!(sz == 2 || sz == 4)) goto decode_failure;
21386 t1 = newTemp(Ity_I64);
21387 abyte = getUChar(delta); delta++;
21388 assign( t1, mkU64( abyte & 0xFF ) );
21389 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21390 goto do_OUT;
21391 case 0xEE: /* OUT AL, %DX */
21392 sz = 1;
21393 t1 = newTemp(Ity_I64);
21394 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21395 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21396 nameIRegRDX(2));
21397 goto do_OUT;
21398 case 0xEF: /* OUT eAX, %DX */
21399 if (!(sz == 2 || sz == 4)) goto decode_failure;
21400 t1 = newTemp(Ity_I64);
21401 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21402 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21403 nameIRegRDX(2));
21404 goto do_OUT;
21405 do_OUT: {
21406 /* At this point, sz indicates the width, and t1 is a 64-bit
21407 value giving port number. */
21408 IRDirty* d;
21409 if (haveF2orF3(pfx)) goto decode_failure;
21410 vassert(sz == 1 || sz == 2 || sz == 4);
21411 ty = szToITy(sz);
21412 d = unsafeIRDirty_0_N(
21413 0/*regparms*/,
21414 "amd64g_dirtyhelper_OUT",
21415 &amd64g_dirtyhelper_OUT,
21416 mkIRExprVec_3( mkexpr(t1),
21417 widenUto64( getIRegRAX(sz) ),
21418 mkU64(sz) )
21420 stmt( IRStmt_Dirty(d) );
21421 return delta;
21424 case 0xE8: /* CALL J4 */
21425 if (haveF3(pfx)) goto decode_failure;
21426 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21427 d64 = getSDisp32(delta); delta += 4;
21428 d64 += (guest_RIP_bbstart+delta);
21429 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21430 t1 = newTemp(Ity_I64);
21431 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21432 putIReg64(R_RSP, mkexpr(t1));
21433 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
21434 t2 = newTemp(Ity_I64);
21435 assign(t2, mkU64((Addr64)d64));
21436 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
21437 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
21438 /* follow into the call target. */
21439 dres->whatNext = Dis_ResteerU;
21440 dres->continueAt = d64;
21441 } else {
21442 jmp_lit(dres, Ijk_Call, d64);
21443 vassert(dres->whatNext == Dis_StopHere);
21445 DIP("call 0x%llx\n", (ULong)d64);
21446 return delta;
21448 case 0xE9: /* Jv (jump, 16/32 offset) */
21449 if (haveF3(pfx)) goto decode_failure;
21450 if (sz != 4)
21451 goto decode_failure; /* JRS added 2004 July 11 */
21452 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21453 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
21454 delta += sz;
21455 if (resteerOkFn(callback_opaque, (Addr64)d64)) {
21456 dres->whatNext = Dis_ResteerU;
21457 dres->continueAt = d64;
21458 } else {
21459 jmp_lit(dres, Ijk_Boring, d64);
21460 vassert(dres->whatNext == Dis_StopHere);
21462 DIP("jmp 0x%llx\n", (ULong)d64);
21463 return delta;
21465 case 0xEB: /* Jb (jump, byte offset) */
21466 if (haveF3(pfx)) goto decode_failure;
21467 if (sz != 4)
21468 goto decode_failure; /* JRS added 2004 July 11 */
21469 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21470 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21471 delta++;
21472 if (resteerOkFn(callback_opaque, (Addr64)d64)) {
21473 dres->whatNext = Dis_ResteerU;
21474 dres->continueAt = d64;
21475 } else {
21476 jmp_lit(dres, Ijk_Boring, d64);
21477 vassert(dres->whatNext == Dis_StopHere);
21479 DIP("jmp-8 0x%llx\n", (ULong)d64);
21480 return delta;
21482 case 0xF5: /* CMC */
21483 case 0xF8: /* CLC */
21484 case 0xF9: /* STC */
21485 t1 = newTemp(Ity_I64);
21486 t2 = newTemp(Ity_I64);
21487 assign( t1, mk_amd64g_calculate_rflags_all() );
21488 switch (opc) {
21489 case 0xF5:
21490 assign( t2, binop(Iop_Xor64, mkexpr(t1),
21491 mkU64(AMD64G_CC_MASK_C)));
21492 DIP("cmc\n");
21493 break;
21494 case 0xF8:
21495 assign( t2, binop(Iop_And64, mkexpr(t1),
21496 mkU64(~AMD64G_CC_MASK_C)));
21497 DIP("clc\n");
21498 break;
21499 case 0xF9:
21500 assign( t2, binop(Iop_Or64, mkexpr(t1),
21501 mkU64(AMD64G_CC_MASK_C)));
21502 DIP("stc\n");
21503 break;
21504 default:
21505 vpanic("disInstr(x64)(cmc/clc/stc)");
21507 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21508 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21509 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
21510 /* Set NDEP even though it isn't used. This makes redundant-PUT
21511 elimination of previous stores to this field work better. */
21512 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21513 return delta;
21515 case 0xF6: { /* Grp3 Eb */
21516 Bool decode_OK = True;
21517 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21518 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21519 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
21520 if (!decode_OK) goto decode_failure;
21521 return delta;
21524 case 0xF7: { /* Grp3 Ev */
21525 Bool decode_OK = True;
21526 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21527 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21528 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
21529 if (!decode_OK) goto decode_failure;
21530 return delta;
21533 case 0xFC: /* CLD */
21534 if (haveF2orF3(pfx)) goto decode_failure;
21535 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
21536 DIP("cld\n");
21537 return delta;
21539 case 0xFD: /* STD */
21540 if (haveF2orF3(pfx)) goto decode_failure;
21541 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
21542 DIP("std\n");
21543 return delta;
21545 case 0xFE: { /* Grp4 Eb */
21546 Bool decode_OK = True;
21547 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21548 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21549 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
21550 if (!decode_OK) goto decode_failure;
21551 return delta;
21554 case 0xFF: { /* Grp5 Ev */
21555 Bool decode_OK = True;
21556 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21557 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21558 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
21559 if (!decode_OK) goto decode_failure;
21560 return delta;
21563 default:
21564 break;
21568 decode_failure:
21569 return deltaIN; /* fail */
21573 /*------------------------------------------------------------*/
21574 /*--- ---*/
21575 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21576 /*--- ---*/
21577 /*------------------------------------------------------------*/
21579 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
21581 IRTemp t2 = newTemp(ty);
21582 if (ty == Ity_I64) {
21583 IRTemp m8 = newTemp(Ity_I64);
21584 IRTemp s8 = newTemp(Ity_I64);
21585 IRTemp m16 = newTemp(Ity_I64);
21586 IRTemp s16 = newTemp(Ity_I64);
21587 IRTemp m32 = newTemp(Ity_I64);
21588 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
21589 assign( s8,
21590 binop(Iop_Or64,
21591 binop(Iop_Shr64,
21592 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
21593 mkU8(8)),
21594 binop(Iop_And64,
21595 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
21596 mkexpr(m8))
21600 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
21601 assign( s16,
21602 binop(Iop_Or64,
21603 binop(Iop_Shr64,
21604 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
21605 mkU8(16)),
21606 binop(Iop_And64,
21607 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
21608 mkexpr(m16))
21612 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
21613 assign( t2,
21614 binop(Iop_Or64,
21615 binop(Iop_Shr64,
21616 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
21617 mkU8(32)),
21618 binop(Iop_And64,
21619 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21620 mkexpr(m32))
21623 return t2;
21625 if (ty == Ity_I32) {
21626 assign( t2,
21627 binop(
21628 Iop_Or32,
21629 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21630 binop(
21631 Iop_Or32,
21632 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21633 mkU32(0x00FF0000)),
21634 binop(Iop_Or32,
21635 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21636 mkU32(0x0000FF00)),
21637 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21638 mkU32(0x000000FF) )
21641 return t2;
21643 if (ty == Ity_I16) {
21644 assign(t2,
21645 binop(Iop_Or16,
21646 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21647 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21648 return t2;
21650 vassert(0);
21651 /*NOTREACHED*/
21652 return IRTemp_INVALID;
21656 __attribute__((noinline))
21657 static
21658 Long dis_ESC_0F (
21659 /*MB_OUT*/DisResult* dres,
21660 /*MB_OUT*/Bool* expect_CAS,
21661 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
21662 Bool resteerCisOk,
21663 void* callback_opaque,
21664 const VexArchInfo* archinfo,
21665 const VexAbiInfo* vbi,
21666 Prefix pfx, Int sz, Long deltaIN
21669 Long d64 = 0;
21670 IRTemp addr = IRTemp_INVALID;
21671 IRTemp t1 = IRTemp_INVALID;
21672 IRTemp t2 = IRTemp_INVALID;
21673 UChar modrm = 0;
21674 Int am_sz = 0;
21675 Int alen = 0;
21676 HChar dis_buf[50];
21678 /* In the first switch, look for ordinary integer insns. */
21679 Long delta = deltaIN;
21680 UChar opc = getUChar(delta);
21681 delta++;
21682 switch (opc) { /* first switch */
21684 case 0x01:
21686 modrm = getUChar(delta);
21687 /* 0F 01 /0 -- SGDT */
21688 /* 0F 01 /1 -- SIDT */
21689 if (!epartIsReg(modrm)
21690 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21691 /* This is really revolting, but ... since each processor
21692 (core) only has one IDT and one GDT, just let the guest
21693 see it (pass-through semantics). I can't see any way to
21694 construct a faked-up value, so don't bother to try. */
21695 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21696 delta += alen;
21697 switch (gregLO3ofRM(modrm)) {
21698 case 0: DIP("sgdt %s\n", dis_buf); break;
21699 case 1: DIP("sidt %s\n", dis_buf); break;
21700 default: vassert(0); /*NOTREACHED*/
21702 IRDirty* d = unsafeIRDirty_0_N (
21703 0/*regparms*/,
21704 "amd64g_dirtyhelper_SxDT",
21705 &amd64g_dirtyhelper_SxDT,
21706 mkIRExprVec_2( mkexpr(addr),
21707 mkU64(gregLO3ofRM(modrm)) )
21709 /* declare we're writing memory */
21710 d->mFx = Ifx_Write;
21711 d->mAddr = mkexpr(addr);
21712 d->mSize = 6;
21713 stmt( IRStmt_Dirty(d) );
21714 return delta;
21716 /* 0F 01 D0 = XGETBV */
21717 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21718 delta += 1;
21719 DIP("xgetbv\n");
21720 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21721 am not sure if that translates in to SEGV or to something
21722 else, in user space. */
21723 t1 = newTemp(Ity_I32);
21724 assign( t1, getIReg32(R_RCX) );
21725 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21726 Ijk_SigSEGV,
21727 IRConst_U64(guest_RIP_curr_instr),
21728 OFFB_RIP
21730 putIRegRAX(4, mkU32(7));
21731 putIRegRDX(4, mkU32(0));
21732 return delta;
21734 /* BEGIN HACKY SUPPORT FOR xend */
21735 /* 0F 01 D5 = XEND */
21736 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21737 /* We are never in an transaction (xbegin immediately aborts).
21738 So this just always generates a General Protection Fault. */
21739 delta += 1;
21740 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21741 vassert(dres->whatNext == Dis_StopHere);
21742 DIP("xend\n");
21743 return delta;
21745 /* END HACKY SUPPORT FOR xend */
21746 /* BEGIN HACKY SUPPORT FOR xtest */
21747 /* 0F 01 D6 = XTEST */
21748 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21749 /* Sets ZF because there never is a transaction, and all
21750 CF, OF, SF, PF and AF are always cleared by xtest. */
21751 delta += 1;
21752 DIP("xtest\n");
21753 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21754 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21755 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21756 /* Set NDEP even though it isn't used. This makes redundant-PUT
21757 elimination of previous stores to this field work better. */
21758 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21759 return delta;
21761 /* END HACKY SUPPORT FOR xtest */
21762 /* 0F 01 F9 = RDTSCP */
21763 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21764 delta += 1;
21765 /* Uses dirty helper:
21766 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21767 declared to wr rax, rcx, rdx
21769 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21770 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21771 IRDirty* d
21772 = unsafeIRDirty_0_N ( 0/*regparms*/,
21773 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
21774 /* declare guest state effects */
21775 d->nFxState = 3;
21776 vex_bzero(&d->fxState, sizeof(d->fxState));
21777 d->fxState[0].fx = Ifx_Write;
21778 d->fxState[0].offset = OFFB_RAX;
21779 d->fxState[0].size = 8;
21780 d->fxState[1].fx = Ifx_Write;
21781 d->fxState[1].offset = OFFB_RCX;
21782 d->fxState[1].size = 8;
21783 d->fxState[2].fx = Ifx_Write;
21784 d->fxState[2].offset = OFFB_RDX;
21785 d->fxState[2].size = 8;
21786 /* execute the dirty call, side-effecting guest state */
21787 stmt( IRStmt_Dirty(d) );
21788 /* RDTSCP is a serialising insn. So, just in case someone is
21789 using it as a memory fence ... */
21790 stmt( IRStmt_MBE(Imbe_Fence) );
21791 DIP("rdtscp\n");
21792 return delta;
21794 /* else decode failed */
21795 break;
21798 case 0x05: /* SYSCALL */
21799 guest_RIP_next_mustcheck = True;
21800 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21801 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21802 /* It's important that all guest state is up-to-date
21803 at this point. So we declare an end-of-block here, which
21804 forces any cached guest state to be flushed. */
21805 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21806 vassert(dres->whatNext == Dis_StopHere);
21807 DIP("syscall\n");
21808 return delta;
21810 case 0x0B: /* UD2 */
21811 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21812 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21813 vassert(dres->whatNext == Dis_StopHere);
21814 DIP("ud2\n");
21815 return delta;
21817 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21818 /* 0F 0D /1 -- prefetchw mem8 */
21819 if (have66orF2orF3(pfx)) goto decode_failure;
21820 modrm = getUChar(delta);
21821 if (epartIsReg(modrm)) goto decode_failure;
21822 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21823 goto decode_failure;
21824 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21825 delta += alen;
21826 switch (gregLO3ofRM(modrm)) {
21827 case 0: DIP("prefetch %s\n", dis_buf); break;
21828 case 1: DIP("prefetchw %s\n", dis_buf); break;
21829 default: vassert(0); /*NOTREACHED*/
21831 return delta;
21833 case 0x19:
21834 case 0x1C:
21835 case 0x1D:
21836 case 0x1E:
21837 case 0x1F:
21838 // Intel CET instructions can have any prefixes before NOPs
21839 // and can use any ModRM, SIB and disp
21840 modrm = getUChar(delta);
21841 if (epartIsReg(modrm)) {
21842 delta += 1;
21843 DIP("nop%c\n", nameISize(sz));
21844 } else {
21845 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21846 delta += alen;
21847 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21849 return delta;
21851 case 0x31: { /* RDTSC */
21852 IRTemp val = newTemp(Ity_I64);
21853 IRExpr** args = mkIRExprVec_0();
21854 IRDirty* d = unsafeIRDirty_1_N (
21855 val,
21856 0/*regparms*/,
21857 "amd64g_dirtyhelper_RDTSC",
21858 &amd64g_dirtyhelper_RDTSC,
21859 args
21861 if (have66orF2orF3(pfx)) goto decode_failure;
21862 /* execute the dirty call, dumping the result in val. */
21863 stmt( IRStmt_Dirty(d) );
21864 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21865 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21866 DIP("rdtsc\n");
21867 return delta;
21870 case 0x40:
21871 case 0x41:
21872 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21873 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21874 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21875 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21876 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21877 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21878 case 0x48: /* CMOVSb (cmov negative) */
21879 case 0x49: /* CMOVSb (cmov not negative) */
21880 case 0x4A: /* CMOVP (cmov parity even) */
21881 case 0x4B: /* CMOVNP (cmov parity odd) */
21882 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21883 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21884 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21885 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21886 if (haveF2orF3(pfx)) goto decode_failure;
21887 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21888 return delta;
21890 case 0x80:
21891 case 0x81:
21892 case 0x82: /* JBb/JNAEb (jump below) */
21893 case 0x83: /* JNBb/JAEb (jump not below) */
21894 case 0x84: /* JZb/JEb (jump zero) */
21895 case 0x85: /* JNZb/JNEb (jump not zero) */
21896 case 0x86: /* JBEb/JNAb (jump below or equal) */
21897 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21898 case 0x88: /* JSb (jump negative) */
21899 case 0x89: /* JSb (jump not negative) */
21900 case 0x8A: /* JP (jump parity even) */
21901 case 0x8B: /* JNP/JPO (jump parity odd) */
21902 case 0x8C: /* JLb/JNGEb (jump less) */
21903 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21904 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21905 case 0x8F: { /* JGb/JNLEb (jump greater) */
21906 Long jmpDelta;
21907 const HChar* comment = "";
21908 if (haveF3(pfx)) goto decode_failure;
21909 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21910 jmpDelta = getSDisp32(delta);
21911 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21912 delta += 4;
21913 if (resteerCisOk
21914 && vex_control.guest_chase_cond
21915 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21916 && jmpDelta < 0
21917 && resteerOkFn( callback_opaque, (Addr64)d64) ) {
21918 /* Speculation: assume this backward branch is taken. So
21919 we need to emit a side-exit to the insn following this
21920 one, on the negation of the condition, and continue at
21921 the branch target address (d64). If we wind up back at
21922 the first instruction of the trace, just stop; it's
21923 better to let the IR loop unroller handle that case. */
21924 stmt( IRStmt_Exit(
21925 mk_amd64g_calculate_condition(
21926 (AMD64Condcode)(1 ^ (opc - 0x80))),
21927 Ijk_Boring,
21928 IRConst_U64(guest_RIP_bbstart+delta),
21929 OFFB_RIP
21931 dres->whatNext = Dis_ResteerC;
21932 dres->continueAt = d64;
21933 comment = "(assumed taken)";
21935 else
21936 if (resteerCisOk
21937 && vex_control.guest_chase_cond
21938 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21939 && jmpDelta >= 0
21940 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
21941 /* Speculation: assume this forward branch is not taken.
21942 So we need to emit a side-exit to d64 (the dest) and
21943 continue disassembling at the insn immediately
21944 following this one. */
21945 stmt( IRStmt_Exit(
21946 mk_amd64g_calculate_condition((AMD64Condcode)
21947 (opc - 0x80)),
21948 Ijk_Boring,
21949 IRConst_U64(d64),
21950 OFFB_RIP
21952 dres->whatNext = Dis_ResteerC;
21953 dres->continueAt = guest_RIP_bbstart+delta;
21954 comment = "(assumed not taken)";
21956 else {
21957 /* Conservative default translation - end the block at
21958 this point. */
21959 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21960 guest_RIP_bbstart+delta, d64 );
21961 vassert(dres->whatNext == Dis_StopHere);
21963 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), (ULong)d64,
21964 comment);
21965 return delta;
21968 case 0x90:
21969 case 0x91:
21970 case 0x92: /* set-Bb/set-NAEb (set if below) */
21971 case 0x93: /* set-NBb/set-AEb (set if not below) */
21972 case 0x94: /* set-Zb/set-Eb (set if zero) */
21973 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21974 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21975 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21976 case 0x98: /* set-Sb (set if negative) */
21977 case 0x99: /* set-Sb (set if not negative) */
21978 case 0x9A: /* set-P (set if parity even) */
21979 case 0x9B: /* set-NP (set if parity odd) */
21980 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21981 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21982 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21983 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21984 if (haveF2orF3(pfx)) goto decode_failure;
21985 t1 = newTemp(Ity_I8);
21986 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21987 modrm = getUChar(delta);
21988 if (epartIsReg(modrm)) {
21989 delta++;
21990 putIRegE(1, pfx, modrm, mkexpr(t1));
21991 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21992 nameIRegE(1,pfx,modrm));
21993 } else {
21994 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21995 delta += alen;
21996 storeLE( mkexpr(addr), mkexpr(t1) );
21997 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21999 return delta;
22001 case 0x1A:
22002 case 0x1B: { /* Future MPX instructions, currently NOPs.
22003 BNDMK b, m F3 0F 1B
22004 BNDCL b, r/m F3 0F 1A
22005 BNDCU b, r/m F2 0F 1A
22006 BNDCN b, r/m F2 0F 1B
22007 BNDMOV b, b/m 66 0F 1A
22008 BNDMOV b/m, b 66 0F 1B
22009 BNDLDX b, mib 0F 1A
22010 BNDSTX mib, b 0F 1B */
22012 /* All instructions have two operands. One operand is always the
22013 bnd register number (bnd0-bnd3, other register numbers are
22014 ignored when MPX isn't enabled, but should generate an
22015 exception if MPX is enabled) given by gregOfRexRM. The other
22016 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
22017 address, all of which can be decoded by using either
22018 eregOfRexRM or disAMode. */
22020 modrm = getUChar(delta);
22021 int bnd = gregOfRexRM(pfx,modrm);
22022 const HChar *oper;
22023 if (epartIsReg(modrm)) {
22024 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
22025 delta += 1;
22026 } else {
22027 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22028 delta += alen;
22029 oper = dis_buf;
22032 if (haveF3no66noF2 (pfx)) {
22033 if (opc == 0x1B) {
22034 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
22035 } else /* opc == 0x1A */ {
22036 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
22038 } else if (haveF2no66noF3 (pfx)) {
22039 if (opc == 0x1A) {
22040 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
22041 } else /* opc == 0x1B */ {
22042 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
22044 } else if (have66noF2noF3 (pfx)) {
22045 if (opc == 0x1A) {
22046 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
22047 } else /* opc == 0x1B */ {
22048 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
22050 } else if (haveNo66noF2noF3 (pfx)) {
22051 if (opc == 0x1A) {
22052 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
22053 } else /* opc == 0x1B */ {
22054 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
22056 } else goto decode_failure;
22058 return delta;
22061 case 0xA2: { /* CPUID */
22062 /* Uses dirty helper:
22063 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22064 declared to mod rax, wr rbx, rcx, rdx
22066 IRDirty* d = NULL;
22067 const HChar* fName = NULL;
22068 void* fAddr = NULL;
22070 if (haveF2orF3(pfx)) goto decode_failure;
22072 /* This isn't entirely correct, CPUID should depend on the VEX
22073 capabilities, not on the underlying CPU. See bug #324882. */
22074 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
22075 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22076 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) {
22077 fName = "amd64g_dirtyhelper_CPUID_avx2";
22078 fAddr = &amd64g_dirtyhelper_CPUID_avx2;
22079 /* This is a Core-i7-4910-like machine */
22081 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
22082 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22083 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22084 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22085 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
22086 /* This is a Core-i5-2300-like machine */
22088 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
22089 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
22090 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22091 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
22092 /* This is a Core-i5-670-like machine */
22094 else {
22095 /* Give a CPUID for at least a baseline machine, SSE2
22096 only, and no CX16 */
22097 fName = "amd64g_dirtyhelper_CPUID_baseline";
22098 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
22101 vassert(fName); vassert(fAddr);
22102 d = unsafeIRDirty_0_N ( 0/*regparms*/,
22103 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
22104 /* declare guest state effects */
22105 d->nFxState = 4;
22106 vex_bzero(&d->fxState, sizeof(d->fxState));
22107 d->fxState[0].fx = Ifx_Modify;
22108 d->fxState[0].offset = OFFB_RAX;
22109 d->fxState[0].size = 8;
22110 d->fxState[1].fx = Ifx_Write;
22111 d->fxState[1].offset = OFFB_RBX;
22112 d->fxState[1].size = 8;
22113 d->fxState[2].fx = Ifx_Modify;
22114 d->fxState[2].offset = OFFB_RCX;
22115 d->fxState[2].size = 8;
22116 d->fxState[3].fx = Ifx_Write;
22117 d->fxState[3].offset = OFFB_RDX;
22118 d->fxState[3].size = 8;
22119 /* execute the dirty call, side-effecting guest state */
22120 stmt( IRStmt_Dirty(d) );
22121 /* CPUID is a serialising insn. So, just in case someone is
22122 using it as a memory fence ... */
22123 stmt( IRStmt_MBE(Imbe_Fence) );
22124 DIP("cpuid\n");
22125 return delta;
22128 case 0xA3: { /* BT Gv,Ev */
22129 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22130 Bool ok = True;
22131 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22132 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
22133 if (!ok) goto decode_failure;
22134 return delta;
22137 case 0xA4: /* SHLDv imm8,Gv,Ev */
22138 modrm = getUChar(delta);
22139 d64 = delta + lengthAMode(pfx, delta);
22140 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22141 delta = dis_SHLRD_Gv_Ev (
22142 vbi, pfx, delta, modrm, sz,
22143 mkU8(getUChar(d64)), True, /* literal */
22144 dis_buf, True /* left */ );
22145 return delta;
22147 case 0xA5: /* SHLDv %cl,Gv,Ev */
22148 modrm = getUChar(delta);
22149 delta = dis_SHLRD_Gv_Ev (
22150 vbi, pfx, delta, modrm, sz,
22151 getIRegCL(), False, /* not literal */
22152 "%cl", True /* left */ );
22153 return delta;
22155 case 0xAB: { /* BTS Gv,Ev */
22156 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22157 Bool ok = True;
22158 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22159 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
22160 if (!ok) goto decode_failure;
22161 return delta;
22164 case 0xAC: /* SHRDv imm8,Gv,Ev */
22165 modrm = getUChar(delta);
22166 d64 = delta + lengthAMode(pfx, delta);
22167 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22168 delta = dis_SHLRD_Gv_Ev (
22169 vbi, pfx, delta, modrm, sz,
22170 mkU8(getUChar(d64)), True, /* literal */
22171 dis_buf, False /* right */ );
22172 return delta;
22174 case 0xAD: /* SHRDv %cl,Gv,Ev */
22175 modrm = getUChar(delta);
22176 delta = dis_SHLRD_Gv_Ev (
22177 vbi, pfx, delta, modrm, sz,
22178 getIRegCL(), False, /* not literal */
22179 "%cl", False /* right */);
22180 return delta;
22182 case 0xAF: /* IMUL Ev, Gv */
22183 if (haveF2orF3(pfx)) goto decode_failure;
22184 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
22185 return delta;
22187 case 0xB0: { /* CMPXCHG Gb,Eb */
22188 Bool ok = True;
22189 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22190 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
22191 if (!ok) goto decode_failure;
22192 return delta;
22195 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22196 Bool ok = True;
22197 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22198 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
22199 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
22200 if (!ok) goto decode_failure;
22201 return delta;
22204 case 0xB3: { /* BTR Gv,Ev */
22205 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22206 Bool ok = True;
22207 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22208 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
22209 if (!ok) goto decode_failure;
22210 return delta;
22213 case 0xB6: /* MOVZXb Eb,Gv */
22214 if (haveF2orF3(pfx)) goto decode_failure;
22215 if (sz != 2 && sz != 4 && sz != 8)
22216 goto decode_failure;
22217 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
22218 return delta;
22220 case 0xB7: /* MOVZXw Ew,Gv */
22221 if (haveF2orF3(pfx)) goto decode_failure;
22222 if (sz != 4 && sz != 8)
22223 goto decode_failure;
22224 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
22225 return delta;
22227 case 0xBA: { /* Grp8 Ib,Ev */
22228 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22229 Bool decode_OK = False;
22230 modrm = getUChar(delta);
22231 am_sz = lengthAMode(pfx,delta);
22232 d64 = getSDisp8(delta + am_sz);
22233 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
22234 &decode_OK );
22235 if (!decode_OK)
22236 goto decode_failure;
22237 return delta;
22240 case 0xBB: { /* BTC Gv,Ev */
22241 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22242 Bool ok = False;
22243 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22244 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
22245 if (!ok) goto decode_failure;
22246 return delta;
22249 case 0xBC: /* BSF Gv,Ev */
22250 if (!haveF2orF3(pfx)
22251 || (haveF3noF2(pfx)
22252 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
22253 /* no-F2 no-F3 0F BC = BSF
22254 or F3 0F BC = REP; BSF on older CPUs. */
22255 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
22256 return delta;
22258 /* Fall through, since F3 0F BC is TZCNT, and needs to
22259 be handled by dis_ESC_0F__SSE4. */
22260 break;
22262 case 0xBD: /* BSR Gv,Ev */
22263 if (!haveF2orF3(pfx)
22264 || (haveF3noF2(pfx)
22265 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
22266 /* no-F2 no-F3 0F BD = BSR
22267 or F3 0F BD = REP; BSR on older CPUs. */
22268 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
22269 return delta;
22271 /* Fall through, since F3 0F BD is LZCNT, and needs to
22272 be handled by dis_ESC_0F__SSE4. */
22273 break;
22275 case 0xBE: /* MOVSXb Eb,Gv */
22276 if (haveF2orF3(pfx)) goto decode_failure;
22277 if (sz != 2 && sz != 4 && sz != 8)
22278 goto decode_failure;
22279 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
22280 return delta;
22282 case 0xBF: /* MOVSXw Ew,Gv */
22283 if (haveF2orF3(pfx)) goto decode_failure;
22284 if (sz != 4 && sz != 8)
22285 goto decode_failure;
22286 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
22287 return delta;
22289 case 0xC0: { /* XADD Gb,Eb */
22290 Bool decode_OK = False;
22291 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
22292 if (!decode_OK)
22293 goto decode_failure;
22294 return delta;
22297 case 0xC1: { /* XADD Gv,Ev */
22298 Bool decode_OK = False;
22299 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
22300 if (!decode_OK)
22301 goto decode_failure;
22302 return delta;
22305 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
22306 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
22307 IRTemp expdHi = newTemp(elemTy);
22308 IRTemp expdLo = newTemp(elemTy);
22309 IRTemp dataHi = newTemp(elemTy);
22310 IRTemp dataLo = newTemp(elemTy);
22311 IRTemp oldHi = newTemp(elemTy);
22312 IRTemp oldLo = newTemp(elemTy);
22313 IRTemp flags_old = newTemp(Ity_I64);
22314 IRTemp flags_new = newTemp(Ity_I64);
22315 IRTemp success = newTemp(Ity_I1);
22316 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
22317 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
22318 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
22319 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
22320 IRTemp expdHi64 = newTemp(Ity_I64);
22321 IRTemp expdLo64 = newTemp(Ity_I64);
22323 /* Translate this using a DCAS, even if there is no LOCK
22324 prefix. Life is too short to bother with generating two
22325 different translations for the with/without-LOCK-prefix
22326 cases. */
22327 *expect_CAS = True;
22329 /* Decode, and generate address. */
22330 if (have66(pfx)) goto decode_failure;
22331 if (sz != 4 && sz != 8) goto decode_failure;
22332 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
22333 goto decode_failure;
22334 modrm = getUChar(delta);
22335 if (epartIsReg(modrm)) goto decode_failure;
22336 if (gregLO3ofRM(modrm) != 1) goto decode_failure;
22337 if (haveF2orF3(pfx)) {
22338 /* Since the e-part is memory only, F2 or F3 (one or the
22339 other) is acceptable if LOCK is also present. But only
22340 for cmpxchg8b. */
22341 if (sz == 8) goto decode_failure;
22342 if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure;
22345 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22346 delta += alen;
22348 /* cmpxchg16b requires an alignment check. */
22349 if (sz == 8)
22350 gen_SEGV_if_not_16_aligned( addr );
22352 /* Get the expected and new values. */
22353 assign( expdHi64, getIReg64(R_RDX) );
22354 assign( expdLo64, getIReg64(R_RAX) );
22356 /* These are the correctly-sized expected and new values.
22357 However, we also get expdHi64/expdLo64 above as 64-bits
22358 regardless, because we will need them later in the 32-bit
22359 case (paradoxically). */
22360 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
22361 : mkexpr(expdHi64) );
22362 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
22363 : mkexpr(expdLo64) );
22364 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
22365 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
22367 /* Do the DCAS */
22368 stmt( IRStmt_CAS(
22369 mkIRCAS( oldHi, oldLo,
22370 Iend_LE, mkexpr(addr),
22371 mkexpr(expdHi), mkexpr(expdLo),
22372 mkexpr(dataHi), mkexpr(dataLo)
22373 )));
22375 /* success when oldHi:oldLo == expdHi:expdLo */
22376 assign( success,
22377 binop(opCasCmpEQ,
22378 binop(opOR,
22379 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
22380 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
22382 zero
22385 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22386 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22387 which is where they came from originally. Both the actual
22388 contents of these two regs, and any shadow values, are
22389 unchanged. If the DCAS fails then we're putting into
22390 RDX:RAX the value seen in memory. */
22391 /* Now of course there's a complication in the 32-bit case
22392 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22393 unchanged; but if we use the same scheme as in the 64-bit
22394 case, we get hit by the standard rule that a write to the
22395 bottom 32 bits of an integer register zeros the upper 32
22396 bits. And so the upper halves of RDX and RAX mysteriously
22397 become zero. So we have to stuff back in the original
22398 64-bit values which we previously stashed in
22399 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22400 /* It's just _so_ much fun ... */
22401 putIRegRDX( 8,
22402 IRExpr_ITE( mkexpr(success),
22403 mkexpr(expdHi64),
22404 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
22405 : mkexpr(oldHi)
22407 putIRegRAX( 8,
22408 IRExpr_ITE( mkexpr(success),
22409 mkexpr(expdLo64),
22410 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
22411 : mkexpr(oldLo)
22414 /* Copy the success bit into the Z flag and leave the others
22415 unchanged */
22416 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
22417 assign(
22418 flags_new,
22419 binop(Iop_Or64,
22420 binop(Iop_And64, mkexpr(flags_old),
22421 mkU64(~AMD64G_CC_MASK_Z)),
22422 binop(Iop_Shl64,
22423 binop(Iop_And64,
22424 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
22425 mkU8(AMD64G_CC_SHIFT_Z)) ));
22427 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22428 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
22429 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22430 /* Set NDEP even though it isn't used. This makes
22431 redundant-PUT elimination of previous stores to this field
22432 work better. */
22433 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22435 /* Sheesh. Aren't you glad it was me and not you that had to
22436 write and validate all this grunge? */
22438 DIP("cmpxchg8b %s\n", dis_buf);
22439 return delta;
22442 case 0xC8: /* BSWAP %eax */
22443 case 0xC9:
22444 case 0xCA:
22445 case 0xCB:
22446 case 0xCC:
22447 case 0xCD:
22448 case 0xCE:
22449 case 0xCF: /* BSWAP %edi */
22450 if (haveF2orF3(pfx)) goto decode_failure;
22451 /* According to the AMD64 docs, this insn can have size 4 or
22452 8. */
22453 if (sz == 4) {
22454 t1 = newTemp(Ity_I32);
22455 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
22456 t2 = math_BSWAP( t1, Ity_I32 );
22457 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
22458 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
22459 return delta;
22461 if (sz == 8) {
22462 t1 = newTemp(Ity_I64);
22463 t2 = newTemp(Ity_I64);
22464 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
22465 t2 = math_BSWAP( t1, Ity_I64 );
22466 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
22467 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
22468 return delta;
22470 goto decode_failure;
22472 default:
22473 break;
22475 } /* first switch */
22478 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22479 /* In the second switch, pick off MMX insns. */
22481 if (!have66orF2orF3(pfx)) {
22482 /* So there's no SIMD prefix. */
22484 vassert(sz == 4 || sz == 8);
22486 switch (opc) { /* second switch */
22488 case 0x71:
22489 case 0x72:
22490 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22492 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22493 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22494 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22495 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22497 case 0xFC:
22498 case 0xFD:
22499 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22501 case 0xEC:
22502 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22504 case 0xDC:
22505 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22507 case 0xF8:
22508 case 0xF9:
22509 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22511 case 0xE8:
22512 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22514 case 0xD8:
22515 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22517 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22518 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22520 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22522 case 0x74:
22523 case 0x75:
22524 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22526 case 0x64:
22527 case 0x65:
22528 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22530 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22531 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22532 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22534 case 0x68:
22535 case 0x69:
22536 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22538 case 0x60:
22539 case 0x61:
22540 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22542 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22543 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22544 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22545 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22547 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22548 case 0xF2:
22549 case 0xF3:
22551 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22552 case 0xD2:
22553 case 0xD3:
22555 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22556 case 0xE2: {
22557 Bool decode_OK = False;
22558 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
22559 if (decode_OK)
22560 return delta;
22561 goto decode_failure;
22564 default:
22565 break;
22566 } /* second switch */
22570 /* A couple of MMX corner cases */
22571 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
22572 if (sz != 4)
22573 goto decode_failure;
22574 do_EMMS_preamble();
22575 DIP("{f}emms\n");
22576 return delta;
22579 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22580 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22581 without checking the guest hwcaps because SSE2 is a baseline
22582 facility in 64 bit mode. */
22584 Bool decode_OK = False;
22585 delta = dis_ESC_0F__SSE2 ( &decode_OK,
22586 archinfo, vbi, pfx, sz, deltaIN, dres );
22587 if (decode_OK)
22588 return delta;
22591 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22592 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22593 first. */
22595 Bool decode_OK = False;
22596 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22597 if (decode_OK)
22598 return delta;
22601 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22602 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22603 first. */
22605 Bool decode_OK = False;
22606 delta = dis_ESC_0F__SSE4 ( &decode_OK,
22607 archinfo, vbi, pfx, sz, deltaIN );
22608 if (decode_OK)
22609 return delta;
22612 decode_failure:
22613 return deltaIN; /* fail */
22617 /*------------------------------------------------------------*/
22618 /*--- ---*/
22619 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22620 /*--- ---*/
22621 /*------------------------------------------------------------*/
22623 __attribute__((noinline))
22624 static
22625 Long dis_ESC_0F38 (
22626 /*MB_OUT*/DisResult* dres,
22627 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
22628 Bool resteerCisOk,
22629 void* callback_opaque,
22630 const VexArchInfo* archinfo,
22631 const VexAbiInfo* vbi,
22632 Prefix pfx, Int sz, Long deltaIN
22635 Long delta = deltaIN;
22636 UChar opc = getUChar(delta);
22637 delta++;
22638 switch (opc) {
22640 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22641 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22642 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22643 && (sz == 2 || sz == 4 || sz == 8)) {
22644 IRTemp addr = IRTemp_INVALID;
22645 UChar modrm = 0;
22646 Int alen = 0;
22647 HChar dis_buf[50];
22648 modrm = getUChar(delta);
22649 if (epartIsReg(modrm)) break;
22650 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22651 delta += alen;
22652 IRType ty = szToITy(sz);
22653 IRTemp src = newTemp(ty);
22654 if (opc == 0xF0) { /* LOAD */
22655 assign(src, loadLE(ty, mkexpr(addr)));
22656 IRTemp dst = math_BSWAP(src, ty);
22657 putIRegG(sz, pfx, modrm, mkexpr(dst));
22658 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22659 } else { /* STORE */
22660 assign(src, getIRegG(sz, pfx, modrm));
22661 IRTemp dst = math_BSWAP(src, ty);
22662 storeLE(mkexpr(addr), mkexpr(dst));
22663 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22665 return delta;
22667 /* else fall through; maybe one of the decoders below knows what
22668 it is. */
22669 break;
22672 default:
22673 break;
22676 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22677 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22678 rather than proceeding indiscriminately. */
22680 Bool decode_OK = False;
22681 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22682 if (decode_OK)
22683 return delta;
22686 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22687 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22688 rather than proceeding indiscriminately. */
22690 Bool decode_OK = False;
22691 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22692 if (decode_OK)
22693 return delta;
22696 /* Ignore previous decode attempts and restart from the beginning of
22697 the instruction. */
22698 delta = deltaIN;
22699 opc = getUChar(delta);
22700 delta++;
22702 switch (opc) {
22704 case 0xF6: {
22705 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22706 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22707 /* These were introduced in Broadwell. Gate them on AVX so as to at
22708 least reject them on earlier guests. Has no host requirements. */
22709 if (have66noF2noF3(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22710 if (sz == 2) {
22711 sz = 4; /* 66 prefix but operand size is 4/8 */
22713 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarryX, True,
22714 sz, delta, "adcx" );
22715 return delta;
22717 if (haveF3no66noF2(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22718 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagOverX, True,
22719 sz, delta, "adox" );
22720 return delta;
22722 /* else fall through */
22723 break;
22726 default:
22727 break;
22730 /*decode_failure:*/
22731 return deltaIN; /* fail */
22735 /*------------------------------------------------------------*/
22736 /*--- ---*/
22737 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22738 /*--- ---*/
22739 /*------------------------------------------------------------*/
22741 __attribute__((noinline))
22742 static
22743 Long dis_ESC_0F3A (
22744 /*MB_OUT*/DisResult* dres,
22745 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
22746 Bool resteerCisOk,
22747 void* callback_opaque,
22748 const VexArchInfo* archinfo,
22749 const VexAbiInfo* vbi,
22750 Prefix pfx, Int sz, Long deltaIN
22753 Long delta = deltaIN;
22754 UChar opc = getUChar(delta);
22755 delta++;
22756 switch (opc) {
22758 default:
22759 break;
22763 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22764 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22765 rather than proceeding indiscriminately. */
22767 Bool decode_OK = False;
22768 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22769 if (decode_OK)
22770 return delta;
22773 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22774 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22775 rather than proceeding indiscriminately. */
22777 Bool decode_OK = False;
22778 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22779 if (decode_OK)
22780 return delta;
22783 return deltaIN; /* fail */
22787 /*------------------------------------------------------------*/
22788 /*--- ---*/
22789 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22790 /*--- ---*/
22791 /*------------------------------------------------------------*/
22793 /* FIXME: common up with the _256_ version below? */
22794 static
22795 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22796 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22797 Prefix pfx, Long delta, const HChar* name,
22798 /* The actual operation. Use either 'op' or 'opfn',
22799 but not both. */
22800 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
22801 Bool invertLeftArg,
22802 Bool swapArgs
22805 UChar modrm = getUChar(delta);
22806 UInt rD = gregOfRexRM(pfx, modrm);
22807 UInt rSL = getVexNvvvv(pfx);
22808 IRTemp tSL = newTemp(Ity_V128);
22809 IRTemp tSR = newTemp(Ity_V128);
22810 IRTemp addr = IRTemp_INVALID;
22811 HChar dis_buf[50];
22812 Int alen = 0;
22813 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22815 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22816 : getXMMReg(rSL));
22818 if (epartIsReg(modrm)) {
22819 UInt rSR = eregOfRexRM(pfx, modrm);
22820 delta += 1;
22821 assign(tSR, getXMMReg(rSR));
22822 DIP("%s %s,%s,%s\n",
22823 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22824 } else {
22825 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22826 delta += alen;
22827 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22828 DIP("%s %s,%s,%s\n",
22829 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22832 IRTemp res = IRTemp_INVALID;
22833 if (op != Iop_INVALID) {
22834 vassert(opFn == NULL);
22835 res = newTemp(Ity_V128);
22836 if (requiresRMode(op)) {
22837 IRTemp rm = newTemp(Ity_I32);
22838 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22839 assign(res, swapArgs
22840 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22841 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22842 } else {
22843 assign(res, swapArgs
22844 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22845 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22847 } else {
22848 vassert(opFn != NULL);
22849 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
22852 putYMMRegLoAndZU(rD, mkexpr(res));
22854 *uses_vvvv = True;
22855 return delta;
22859 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22860 for the operation, no inversion of the left arg, and no swapping of
22861 args. */
22862 static
22863 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22864 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22865 Prefix pfx, Long delta, const HChar* name,
22866 IROp op
22869 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22870 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
22874 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22875 generator to compute the result, no inversion of the left
22876 arg, and no swapping of args. */
22877 static
22878 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22879 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22880 Prefix pfx, Long delta, const HChar* name,
22881 IRTemp(*opFn)(IRTemp,IRTemp)
22884 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22885 uses_vvvv, vbi, pfx, delta, name,
22886 Iop_INVALID, opFn, False, False );
22890 /* Vector by scalar shift of V by the amount specified at the bottom
22891 of E. */
22892 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi,
22893 Prefix pfx, Long delta,
22894 const HChar* opname, IROp op )
22896 HChar dis_buf[50];
22897 Int alen, size;
22898 IRTemp addr;
22899 Bool shl, shr, sar;
22900 UChar modrm = getUChar(delta);
22901 UInt rG = gregOfRexRM(pfx,modrm);
22902 UInt rV = getVexNvvvv(pfx);;
22903 IRTemp g0 = newTemp(Ity_V128);
22904 IRTemp g1 = newTemp(Ity_V128);
22905 IRTemp amt = newTemp(Ity_I64);
22906 IRTemp amt8 = newTemp(Ity_I8);
22907 if (epartIsReg(modrm)) {
22908 UInt rE = eregOfRexRM(pfx,modrm);
22909 assign( amt, getXMMRegLane64(rE, 0) );
22910 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22911 nameXMMReg(rV), nameXMMReg(rG) );
22912 delta++;
22913 } else {
22914 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22915 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22916 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
22917 delta += alen;
22919 assign( g0, getXMMReg(rV) );
22920 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22922 shl = shr = sar = False;
22923 size = 0;
22924 switch (op) {
22925 case Iop_ShlN16x8: shl = True; size = 32; break;
22926 case Iop_ShlN32x4: shl = True; size = 32; break;
22927 case Iop_ShlN64x2: shl = True; size = 64; break;
22928 case Iop_SarN16x8: sar = True; size = 16; break;
22929 case Iop_SarN32x4: sar = True; size = 32; break;
22930 case Iop_ShrN16x8: shr = True; size = 16; break;
22931 case Iop_ShrN32x4: shr = True; size = 32; break;
22932 case Iop_ShrN64x2: shr = True; size = 64; break;
22933 default: vassert(0);
22936 if (shl || shr) {
22937 assign(
22939 IRExpr_ITE(
22940 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22941 binop(op, mkexpr(g0), mkexpr(amt8)),
22942 mkV128(0x0000)
22945 } else
22946 if (sar) {
22947 assign(
22949 IRExpr_ITE(
22950 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22951 binop(op, mkexpr(g0), mkexpr(amt8)),
22952 binop(op, mkexpr(g0), mkU8(size-1))
22955 } else {
22956 vassert(0);
22959 putYMMRegLoAndZU( rG, mkexpr(g1) );
22960 return delta;
22964 /* Vector by scalar shift of V by the amount specified at the bottom
22965 of E. */
22966 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi,
22967 Prefix pfx, Long delta,
22968 const HChar* opname, IROp op )
22970 HChar dis_buf[50];
22971 Int alen, size;
22972 IRTemp addr;
22973 Bool shl, shr, sar;
22974 UChar modrm = getUChar(delta);
22975 UInt rG = gregOfRexRM(pfx,modrm);
22976 UInt rV = getVexNvvvv(pfx);;
22977 IRTemp g0 = newTemp(Ity_V256);
22978 IRTemp g1 = newTemp(Ity_V256);
22979 IRTemp amt = newTemp(Ity_I64);
22980 IRTemp amt8 = newTemp(Ity_I8);
22981 if (epartIsReg(modrm)) {
22982 UInt rE = eregOfRexRM(pfx,modrm);
22983 assign( amt, getXMMRegLane64(rE, 0) );
22984 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22985 nameYMMReg(rV), nameYMMReg(rG) );
22986 delta++;
22987 } else {
22988 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22989 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22990 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
22991 delta += alen;
22993 assign( g0, getYMMReg(rV) );
22994 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22996 shl = shr = sar = False;
22997 size = 0;
22998 switch (op) {
22999 case Iop_ShlN16x16: shl = True; size = 32; break;
23000 case Iop_ShlN32x8: shl = True; size = 32; break;
23001 case Iop_ShlN64x4: shl = True; size = 64; break;
23002 case Iop_SarN16x16: sar = True; size = 16; break;
23003 case Iop_SarN32x8: sar = True; size = 32; break;
23004 case Iop_ShrN16x16: shr = True; size = 16; break;
23005 case Iop_ShrN32x8: shr = True; size = 32; break;
23006 case Iop_ShrN64x4: shr = True; size = 64; break;
23007 default: vassert(0);
23010 if (shl || shr) {
23011 assign(
23013 IRExpr_ITE(
23014 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23015 binop(op, mkexpr(g0), mkexpr(amt8)),
23016 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23019 } else
23020 if (sar) {
23021 assign(
23023 IRExpr_ITE(
23024 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23025 binop(op, mkexpr(g0), mkexpr(amt8)),
23026 binop(op, mkexpr(g0), mkU8(size-1))
23029 } else {
23030 vassert(0);
23033 putYMMReg( rG, mkexpr(g1) );
23034 return delta;
23038 /* Vector by vector shift of V by the amount specified at the bottom
23039 of E. Vector by vector shifts are defined for all shift amounts,
23040 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23041 anyway). */
23042 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi,
23043 Prefix pfx, Long delta,
23044 const HChar* opname, IROp op, Bool isYMM )
23046 HChar dis_buf[50];
23047 Int alen, size, i;
23048 IRTemp addr;
23049 UChar modrm = getUChar(delta);
23050 UInt rG = gregOfRexRM(pfx,modrm);
23051 UInt rV = getVexNvvvv(pfx);;
23052 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23053 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23054 IRTemp amts[8], sVs[8], res[8];
23055 if (epartIsReg(modrm)) {
23056 UInt rE = eregOfRexRM(pfx,modrm);
23057 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
23058 if (isYMM) {
23059 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
23060 nameYMMReg(rV), nameYMMReg(rG) );
23061 } else {
23062 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23063 nameXMMReg(rV), nameXMMReg(rG) );
23065 delta++;
23066 } else {
23067 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23068 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
23069 if (isYMM) {
23070 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
23071 nameYMMReg(rG) );
23072 } else {
23073 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
23074 nameXMMReg(rG) );
23076 delta += alen;
23078 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
23080 size = 0;
23081 switch (op) {
23082 case Iop_Shl32: size = 32; break;
23083 case Iop_Shl64: size = 64; break;
23084 case Iop_Sar32: size = 32; break;
23085 case Iop_Shr32: size = 32; break;
23086 case Iop_Shr64: size = 64; break;
23087 default: vassert(0);
23090 for (i = 0; i < 8; i++) {
23091 sVs[i] = IRTemp_INVALID;
23092 amts[i] = IRTemp_INVALID;
23094 switch (size) {
23095 case 32:
23096 if (isYMM) {
23097 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
23098 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23099 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
23100 &amts[3], &amts[2], &amts[1], &amts[0] );
23101 } else {
23102 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23103 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23105 break;
23106 case 64:
23107 if (isYMM) {
23108 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23109 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23110 } else {
23111 breakupV128to64s( sV, &sVs[1], &sVs[0] );
23112 breakupV128to64s( amt, &amts[1], &amts[0] );
23114 break;
23115 default: vassert(0);
23117 for (i = 0; i < 8; i++)
23118 if (sVs[i] != IRTemp_INVALID) {
23119 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
23120 assign( res[i],
23121 IRExpr_ITE(
23122 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
23123 mkexpr(amts[i]),
23124 size == 32 ? mkU32(size) : mkU64(size)),
23125 binop(op, mkexpr(sVs[i]),
23126 unop(size == 32 ? Iop_32to8 : Iop_64to8,
23127 mkexpr(amts[i]))),
23128 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
23129 : size == 32 ? mkU32(0) : mkU64(0)
23132 switch (size) {
23133 case 32:
23134 for (i = 0; i < 8; i++)
23135 putYMMRegLane32( rG, i, (i < 4 || isYMM)
23136 ? mkexpr(res[i]) : mkU32(0) );
23137 break;
23138 case 64:
23139 for (i = 0; i < 4; i++)
23140 putYMMRegLane64( rG, i, (i < 2 || isYMM)
23141 ? mkexpr(res[i]) : mkU64(0) );
23142 break;
23143 default: vassert(0);
23146 return delta;
23150 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23151 version of dis_SSE_shiftE_imm. */
23152 static
23153 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
23154 Long delta, const HChar* opname, IROp op )
23156 Bool shl, shr, sar;
23157 UChar rm = getUChar(delta);
23158 IRTemp e0 = newTemp(Ity_V128);
23159 IRTemp e1 = newTemp(Ity_V128);
23160 UInt rD = getVexNvvvv(pfx);
23161 UChar amt, size;
23162 vassert(epartIsReg(rm));
23163 vassert(gregLO3ofRM(rm) == 2
23164 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23165 amt = getUChar(delta+1);
23166 delta += 2;
23167 DIP("%s $%d,%s,%s\n", opname,
23168 (Int)amt,
23169 nameXMMReg(eregOfRexRM(pfx,rm)),
23170 nameXMMReg(rD));
23171 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
23173 shl = shr = sar = False;
23174 size = 0;
23175 switch (op) {
23176 case Iop_ShlN16x8: shl = True; size = 16; break;
23177 case Iop_ShlN32x4: shl = True; size = 32; break;
23178 case Iop_ShlN64x2: shl = True; size = 64; break;
23179 case Iop_SarN16x8: sar = True; size = 16; break;
23180 case Iop_SarN32x4: sar = True; size = 32; break;
23181 case Iop_ShrN16x8: shr = True; size = 16; break;
23182 case Iop_ShrN32x4: shr = True; size = 32; break;
23183 case Iop_ShrN64x2: shr = True; size = 64; break;
23184 default: vassert(0);
23187 if (shl || shr) {
23188 assign( e1, amt >= size
23189 ? mkV128(0x0000)
23190 : binop(op, mkexpr(e0), mkU8(amt))
23192 } else
23193 if (sar) {
23194 assign( e1, amt >= size
23195 ? binop(op, mkexpr(e0), mkU8(size-1))
23196 : binop(op, mkexpr(e0), mkU8(amt))
23198 } else {
23199 vassert(0);
23202 putYMMRegLoAndZU( rD, mkexpr(e1) );
23203 return delta;
23207 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23208 version of dis_AVX128_shiftE_to_V_imm. */
23209 static
23210 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
23211 Long delta, const HChar* opname, IROp op )
23213 Bool shl, shr, sar;
23214 UChar rm = getUChar(delta);
23215 IRTemp e0 = newTemp(Ity_V256);
23216 IRTemp e1 = newTemp(Ity_V256);
23217 UInt rD = getVexNvvvv(pfx);
23218 UChar amt, size;
23219 vassert(epartIsReg(rm));
23220 vassert(gregLO3ofRM(rm) == 2
23221 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23222 amt = getUChar(delta+1);
23223 delta += 2;
23224 DIP("%s $%d,%s,%s\n", opname,
23225 (Int)amt,
23226 nameYMMReg(eregOfRexRM(pfx,rm)),
23227 nameYMMReg(rD));
23228 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
23230 shl = shr = sar = False;
23231 size = 0;
23232 switch (op) {
23233 case Iop_ShlN16x16: shl = True; size = 16; break;
23234 case Iop_ShlN32x8: shl = True; size = 32; break;
23235 case Iop_ShlN64x4: shl = True; size = 64; break;
23236 case Iop_SarN16x16: sar = True; size = 16; break;
23237 case Iop_SarN32x8: sar = True; size = 32; break;
23238 case Iop_ShrN16x16: shr = True; size = 16; break;
23239 case Iop_ShrN32x8: shr = True; size = 32; break;
23240 case Iop_ShrN64x4: shr = True; size = 64; break;
23241 default: vassert(0);
23245 if (shl || shr) {
23246 assign( e1, amt >= size
23247 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23248 : binop(op, mkexpr(e0), mkU8(amt))
23250 } else
23251 if (sar) {
23252 assign( e1, amt >= size
23253 ? binop(op, mkexpr(e0), mkU8(size-1))
23254 : binop(op, mkexpr(e0), mkU8(amt))
23256 } else {
23257 vassert(0);
23260 putYMMReg( rD, mkexpr(e1) );
23261 return delta;
23265 /* Lower 64-bit lane only AVX128 binary operation:
23266 G[63:0] = V[63:0] `op` E[63:0]
23267 G[127:64] = V[127:64]
23268 G[255:128] = 0.
23269 The specified op must be of the 64F0x2 kind, so that it
23270 copies the upper half of the left operand to the result.
23272 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
23273 const VexAbiInfo* vbi,
23274 Prefix pfx, Long delta,
23275 const HChar* opname, IROp op )
23277 HChar dis_buf[50];
23278 Int alen;
23279 IRTemp addr;
23280 UChar rm = getUChar(delta);
23281 UInt rG = gregOfRexRM(pfx,rm);
23282 UInt rV = getVexNvvvv(pfx);
23283 IRExpr* vpart = getXMMReg(rV);
23284 if (epartIsReg(rm)) {
23285 UInt rE = eregOfRexRM(pfx,rm);
23286 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23287 DIP("%s %s,%s,%s\n", opname,
23288 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23289 delta = delta+1;
23290 } else {
23291 /* We can only do a 64-bit memory read, so the upper half of the
23292 E operand needs to be made simply of zeroes. */
23293 IRTemp epart = newTemp(Ity_V128);
23294 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23295 assign( epart, unop( Iop_64UtoV128,
23296 loadLE(Ity_I64, mkexpr(addr))) );
23297 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23298 DIP("%s %s,%s,%s\n", opname,
23299 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23300 delta = delta+alen;
23302 putYMMRegLane128( rG, 1, mkV128(0) );
23303 *uses_vvvv = True;
23304 return delta;
23308 /* Lower 64-bit lane only AVX128 unary operation:
23309 G[63:0] = op(E[63:0])
23310 G[127:64] = V[127:64]
23311 G[255:128] = 0
23312 The specified op must be of the 64F0x2 kind, so that it
23313 copies the upper half of the operand to the result.
23315 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
23316 const VexAbiInfo* vbi,
23317 Prefix pfx, Long delta,
23318 const HChar* opname, IROp op )
23320 HChar dis_buf[50];
23321 Int alen;
23322 IRTemp addr;
23323 UChar rm = getUChar(delta);
23324 UInt rG = gregOfRexRM(pfx,rm);
23325 UInt rV = getVexNvvvv(pfx);
23326 IRTemp e64 = newTemp(Ity_I64);
23328 /* Fetch E[63:0] */
23329 if (epartIsReg(rm)) {
23330 UInt rE = eregOfRexRM(pfx,rm);
23331 assign(e64, getXMMRegLane64(rE, 0));
23332 DIP("%s %s,%s,%s\n", opname,
23333 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23334 delta += 1;
23335 } else {
23336 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23337 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
23338 DIP("%s %s,%s,%s\n", opname,
23339 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23340 delta += alen;
23343 /* Create a value 'arg' as V[127:64]++E[63:0] */
23344 IRTemp arg = newTemp(Ity_V128);
23345 assign(arg,
23346 binop(Iop_SetV128lo64,
23347 getXMMReg(rV), mkexpr(e64)));
23348 /* and apply op to it */
23349 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23350 *uses_vvvv = True;
23351 return delta;
23355 /* Lower 32-bit lane only AVX128 unary operation:
23356 G[31:0] = op(E[31:0])
23357 G[127:32] = V[127:32]
23358 G[255:128] = 0
23359 The specified op must be of the 32F0x4 kind, so that it
23360 copies the upper 3/4 of the operand to the result.
23362 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
23363 const VexAbiInfo* vbi,
23364 Prefix pfx, Long delta,
23365 const HChar* opname, IROp op )
23367 HChar dis_buf[50];
23368 Int alen;
23369 IRTemp addr;
23370 UChar rm = getUChar(delta);
23371 UInt rG = gregOfRexRM(pfx,rm);
23372 UInt rV = getVexNvvvv(pfx);
23373 IRTemp e32 = newTemp(Ity_I32);
23375 /* Fetch E[31:0] */
23376 if (epartIsReg(rm)) {
23377 UInt rE = eregOfRexRM(pfx,rm);
23378 assign(e32, getXMMRegLane32(rE, 0));
23379 DIP("%s %s,%s,%s\n", opname,
23380 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23381 delta += 1;
23382 } else {
23383 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23384 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
23385 DIP("%s %s,%s,%s\n", opname,
23386 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23387 delta += alen;
23390 /* Create a value 'arg' as V[127:32]++E[31:0] */
23391 IRTemp arg = newTemp(Ity_V128);
23392 assign(arg,
23393 binop(Iop_SetV128lo32,
23394 getXMMReg(rV), mkexpr(e32)));
23395 /* and apply op to it */
23396 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23397 *uses_vvvv = True;
23398 return delta;
23402 /* Lower 32-bit lane only AVX128 binary operation:
23403 G[31:0] = V[31:0] `op` E[31:0]
23404 G[127:32] = V[127:32]
23405 G[255:128] = 0.
23406 The specified op must be of the 32F0x4 kind, so that it
23407 copies the upper 3/4 of the left operand to the result.
23409 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
23410 const VexAbiInfo* vbi,
23411 Prefix pfx, Long delta,
23412 const HChar* opname, IROp op )
23414 HChar dis_buf[50];
23415 Int alen;
23416 IRTemp addr;
23417 UChar rm = getUChar(delta);
23418 UInt rG = gregOfRexRM(pfx,rm);
23419 UInt rV = getVexNvvvv(pfx);
23420 IRExpr* vpart = getXMMReg(rV);
23421 if (epartIsReg(rm)) {
23422 UInt rE = eregOfRexRM(pfx,rm);
23423 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23424 DIP("%s %s,%s,%s\n", opname,
23425 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23426 delta = delta+1;
23427 } else {
23428 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23429 E operand needs to be made simply of zeroes. */
23430 IRTemp epart = newTemp(Ity_V128);
23431 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23432 assign( epart, unop( Iop_32UtoV128,
23433 loadLE(Ity_I32, mkexpr(addr))) );
23434 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23435 DIP("%s %s,%s,%s\n", opname,
23436 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23437 delta = delta+alen;
23439 putYMMRegLane128( rG, 1, mkV128(0) );
23440 *uses_vvvv = True;
23441 return delta;
23445 /* All-lanes AVX128 binary operation:
23446 G[127:0] = V[127:0] `op` E[127:0]
23447 G[255:128] = 0.
23449 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23450 const VexAbiInfo* vbi,
23451 Prefix pfx, Long delta,
23452 const HChar* opname, IROp op )
23454 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23455 uses_vvvv, vbi, pfx, delta, opname, op,
23456 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23461 /* Handles AVX128 32F/64F comparisons. A derivative of
23462 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23463 original delta to indicate failure. */
23464 static
23465 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23466 const VexAbiInfo* vbi,
23467 Prefix pfx, Long delta,
23468 const HChar* opname, Bool all_lanes, Int sz )
23470 vassert(sz == 4 || sz == 8);
23471 Long deltaIN = delta;
23472 HChar dis_buf[50];
23473 Int alen;
23474 UInt imm8;
23475 IRTemp addr;
23476 Bool preSwap = False;
23477 IROp op = Iop_INVALID;
23478 Bool postNot = False;
23479 IRTemp plain = newTemp(Ity_V128);
23480 UChar rm = getUChar(delta);
23481 UInt rG = gregOfRexRM(pfx, rm);
23482 UInt rV = getVexNvvvv(pfx);
23483 IRTemp argL = newTemp(Ity_V128);
23484 IRTemp argR = newTemp(Ity_V128);
23486 assign(argL, getXMMReg(rV));
23487 if (epartIsReg(rm)) {
23488 imm8 = getUChar(delta+1);
23489 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
23490 if (!ok) return deltaIN; /* FAIL */
23491 UInt rE = eregOfRexRM(pfx,rm);
23492 assign(argR, getXMMReg(rE));
23493 delta += 1+1;
23494 DIP("%s $%u,%s,%s,%s\n",
23495 opname, imm8,
23496 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23497 } else {
23498 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23499 imm8 = getUChar(delta+alen);
23500 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
23501 if (!ok) return deltaIN; /* FAIL */
23502 assign(argR,
23503 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
23504 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
23505 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
23506 delta += alen+1;
23507 DIP("%s $%u,%s,%s,%s\n",
23508 opname, imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23511 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL))
23512 : binop(op, mkexpr(argL), mkexpr(argR)));
23514 if (all_lanes) {
23515 /* This is simple: just invert the result, if necessary, and
23516 have done. */
23517 if (postNot) {
23518 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
23519 } else {
23520 putYMMRegLoAndZU( rG, mkexpr(plain) );
23523 else
23524 if (!preSwap) {
23525 /* More complex. It's a one-lane-only, hence need to possibly
23526 invert only that one lane. But at least the other lanes are
23527 correctly "in" the result, having been copied from the left
23528 operand (argL). */
23529 if (postNot) {
23530 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
23531 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
23532 mask) );
23533 } else {
23534 putYMMRegLoAndZU( rG, mkexpr(plain) );
23537 else {
23538 /* This is the most complex case. One-lane-only, but the args
23539 were swapped. So we have to possibly invert the bottom lane,
23540 and (definitely) we have to copy the upper lane(s) from argL
23541 since, due to the swapping, what's currently there is from
23542 argR, which is not correct. */
23543 IRTemp res = newTemp(Ity_V128);
23544 IRTemp mask = newTemp(Ity_V128);
23545 IRTemp notMask = newTemp(Ity_V128);
23546 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
23547 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
23548 if (postNot) {
23549 assign(res,
23550 binop(Iop_OrV128,
23551 binop(Iop_AndV128,
23552 unop(Iop_NotV128, mkexpr(plain)),
23553 mkexpr(mask)),
23554 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23555 } else {
23556 assign(res,
23557 binop(Iop_OrV128,
23558 binop(Iop_AndV128,
23559 mkexpr(plain),
23560 mkexpr(mask)),
23561 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23563 putYMMRegLoAndZU( rG, mkexpr(res) );
23566 *uses_vvvv = True;
23567 return delta;
23571 /* Handles AVX256 32F/64F comparisons. A derivative of
23572 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23573 original delta to indicate failure. */
23574 static
23575 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23576 const VexAbiInfo* vbi,
23577 Prefix pfx, Long delta,
23578 const HChar* opname, Int sz )
23580 vassert(sz == 4 || sz == 8);
23581 Long deltaIN = delta;
23582 HChar dis_buf[50];
23583 Int alen;
23584 UInt imm8;
23585 IRTemp addr;
23586 Bool preSwap = False;
23587 IROp op = Iop_INVALID;
23588 Bool postNot = False;
23589 IRTemp plain = newTemp(Ity_V256);
23590 UChar rm = getUChar(delta);
23591 UInt rG = gregOfRexRM(pfx, rm);
23592 UInt rV = getVexNvvvv(pfx);
23593 IRTemp argL = newTemp(Ity_V256);
23594 IRTemp argR = newTemp(Ity_V256);
23595 IRTemp argLhi = IRTemp_INVALID;
23596 IRTemp argLlo = IRTemp_INVALID;
23597 IRTemp argRhi = IRTemp_INVALID;
23598 IRTemp argRlo = IRTemp_INVALID;
23600 assign(argL, getYMMReg(rV));
23601 if (epartIsReg(rm)) {
23602 imm8 = getUChar(delta+1);
23603 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
23604 True/*all_lanes*/, sz);
23605 if (!ok) return deltaIN; /* FAIL */
23606 UInt rE = eregOfRexRM(pfx,rm);
23607 assign(argR, getYMMReg(rE));
23608 delta += 1+1;
23609 DIP("%s $%u,%s,%s,%s\n",
23610 opname, imm8,
23611 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23612 } else {
23613 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23614 imm8 = getUChar(delta+alen);
23615 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
23616 True/*all_lanes*/, sz);
23617 if (!ok) return deltaIN; /* FAIL */
23618 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
23619 delta += alen+1;
23620 DIP("%s $%u,%s,%s,%s\n",
23621 opname, imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23624 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
23625 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
23626 assign(plain, binop( Iop_V128HLtoV256,
23627 binop(op, mkexpr(argLhi), mkexpr(argRhi)),
23628 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) );
23630 /* This is simple: just invert the result, if necessary, and
23631 have done. */
23632 if (postNot) {
23633 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
23634 } else {
23635 putYMMReg( rG, mkexpr(plain) );
23638 *uses_vvvv = True;
23639 return delta;
23643 /* Handles AVX128 unary E-to-G all-lanes operations. */
23644 static
23645 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23646 const VexAbiInfo* vbi,
23647 Prefix pfx, Long delta,
23648 const HChar* opname,
23649 IRTemp (*opFn)(IRTemp) )
23651 HChar dis_buf[50];
23652 Int alen;
23653 IRTemp addr;
23654 IRTemp res = newTemp(Ity_V128);
23655 IRTemp arg = newTemp(Ity_V128);
23656 UChar rm = getUChar(delta);
23657 UInt rG = gregOfRexRM(pfx, rm);
23658 if (epartIsReg(rm)) {
23659 UInt rE = eregOfRexRM(pfx,rm);
23660 assign(arg, getXMMReg(rE));
23661 delta += 1;
23662 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23663 } else {
23664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23665 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23666 delta += alen;
23667 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23669 res = opFn(arg);
23670 putYMMRegLoAndZU( rG, mkexpr(res) );
23671 *uses_vvvv = False;
23672 return delta;
23676 /* Handles AVX128 unary E-to-G all-lanes operations. */
23677 static
23678 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23679 const VexAbiInfo* vbi,
23680 Prefix pfx, Long delta,
23681 const HChar* opname, IROp op )
23683 HChar dis_buf[50];
23684 Int alen;
23685 IRTemp addr;
23686 IRTemp arg = newTemp(Ity_V128);
23687 UChar rm = getUChar(delta);
23688 UInt rG = gregOfRexRM(pfx, rm);
23689 if (epartIsReg(rm)) {
23690 UInt rE = eregOfRexRM(pfx,rm);
23691 assign(arg, getXMMReg(rE));
23692 delta += 1;
23693 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23694 } else {
23695 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23696 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23697 delta += alen;
23698 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23700 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23701 // up in the usual way.
23702 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
23703 /* XXXROUNDINGFIXME */
23704 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg))
23705 : unop(op, mkexpr(arg));
23706 putYMMRegLoAndZU( rG, res );
23707 *uses_vvvv = False;
23708 return delta;
23712 /* FIXME: common up with the _128_ version above? */
23713 static
23714 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23715 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23716 Prefix pfx, Long delta, const HChar* name,
23717 /* The actual operation. Use either 'op' or 'opfn',
23718 but not both. */
23719 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23720 Bool invertLeftArg,
23721 Bool swapArgs
23724 UChar modrm = getUChar(delta);
23725 UInt rD = gregOfRexRM(pfx, modrm);
23726 UInt rSL = getVexNvvvv(pfx);
23727 IRTemp tSL = newTemp(Ity_V256);
23728 IRTemp tSR = newTemp(Ity_V256);
23729 IRTemp addr = IRTemp_INVALID;
23730 HChar dis_buf[50];
23731 Int alen = 0;
23732 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23734 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23735 : getYMMReg(rSL));
23737 if (epartIsReg(modrm)) {
23738 UInt rSR = eregOfRexRM(pfx, modrm);
23739 delta += 1;
23740 assign(tSR, getYMMReg(rSR));
23741 DIP("%s %s,%s,%s\n",
23742 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23743 } else {
23744 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23745 delta += alen;
23746 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23747 DIP("%s %s,%s,%s\n",
23748 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23751 IRTemp res = IRTemp_INVALID;
23752 if (op != Iop_INVALID) {
23753 vassert(opFn == NULL);
23754 res = newTemp(Ity_V256);
23755 if (requiresRMode(op)) {
23756 IRTemp rm = newTemp(Ity_I32);
23757 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23758 assign(res, swapArgs
23759 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23760 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23761 } else {
23762 assign(res, swapArgs
23763 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23764 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23766 } else {
23767 vassert(opFn != NULL);
23768 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23771 putYMMReg(rD, mkexpr(res));
23773 *uses_vvvv = True;
23774 return delta;
23778 /* All-lanes AVX256 binary operation:
23779 G[255:0] = V[255:0] `op` E[255:0]
23781 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23782 const VexAbiInfo* vbi,
23783 Prefix pfx, Long delta,
23784 const HChar* opname, IROp op )
23786 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23787 uses_vvvv, vbi, pfx, delta, opname, op,
23788 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23793 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23794 for the operation, no inversion of the left arg, and no swapping of
23795 args. */
23796 static
23797 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23798 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23799 Prefix pfx, Long delta, const HChar* name,
23800 IROp op
23803 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23804 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23808 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23809 generator to compute the result, no inversion of the left
23810 arg, and no swapping of args. */
23811 static
23812 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23813 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23814 Prefix pfx, Long delta, const HChar* name,
23815 IRTemp(*opFn)(IRTemp,IRTemp)
23818 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23819 uses_vvvv, vbi, pfx, delta, name,
23820 Iop_INVALID, opFn, False, False );
23824 /* Handles AVX256 unary E-to-G all-lanes operations. */
23825 static
23826 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23827 const VexAbiInfo* vbi,
23828 Prefix pfx, Long delta,
23829 const HChar* opname,
23830 IRTemp (*opFn)(IRTemp) )
23832 HChar dis_buf[50];
23833 Int alen;
23834 IRTemp addr;
23835 IRTemp res = newTemp(Ity_V256);
23836 IRTemp arg = newTemp(Ity_V256);
23837 UChar rm = getUChar(delta);
23838 UInt rG = gregOfRexRM(pfx, rm);
23839 if (epartIsReg(rm)) {
23840 UInt rE = eregOfRexRM(pfx,rm);
23841 assign(arg, getYMMReg(rE));
23842 delta += 1;
23843 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23844 } else {
23845 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23846 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23847 delta += alen;
23848 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23850 res = opFn(arg);
23851 putYMMReg( rG, mkexpr(res) );
23852 *uses_vvvv = False;
23853 return delta;
23857 /* Handles AVX256 unary E-to-G all-lanes operations. */
23858 static
23859 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23860 const VexAbiInfo* vbi,
23861 Prefix pfx, Long delta,
23862 const HChar* opname, IROp op )
23864 HChar dis_buf[50];
23865 Int alen;
23866 IRTemp addr;
23867 IRTemp arg = newTemp(Ity_V256);
23868 UChar rm = getUChar(delta);
23869 UInt rG = gregOfRexRM(pfx, rm);
23870 if (epartIsReg(rm)) {
23871 UInt rE = eregOfRexRM(pfx,rm);
23872 assign(arg, getYMMReg(rE));
23873 delta += 1;
23874 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23875 } else {
23876 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23877 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23878 delta += alen;
23879 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23881 putYMMReg( rG, unop(op, mkexpr(arg)) );
23882 *uses_vvvv = False;
23883 return delta;
23887 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
23888 had a variant of Iop_64x4toV256 that took F64s as args instead. */
23889 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
23890 Long delta )
23892 IRTemp addr = IRTemp_INVALID;
23893 Int alen = 0;
23894 HChar dis_buf[50];
23895 UChar modrm = getUChar(delta);
23896 IRTemp sV = newTemp(Ity_V128);
23897 UInt rG = gregOfRexRM(pfx,modrm);
23898 if (epartIsReg(modrm)) {
23899 UInt rE = eregOfRexRM(pfx,modrm);
23900 assign( sV, getXMMReg(rE) );
23901 delta += 1;
23902 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
23903 } else {
23904 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23905 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
23906 delta += alen;
23907 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
23909 IRTemp s3, s2, s1, s0;
23910 s3 = s2 = s1 = s0 = IRTemp_INVALID;
23911 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
23912 IRExpr* res
23913 = IRExpr_Qop(
23914 Iop_64x4toV256,
23915 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
23916 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
23917 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
23918 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
23920 putYMMReg(rG, res);
23921 return delta;
23925 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
23926 Long delta )
23928 IRTemp addr = IRTemp_INVALID;
23929 Int alen = 0;
23930 HChar dis_buf[50];
23931 UChar modrm = getUChar(delta);
23932 UInt rG = gregOfRexRM(pfx,modrm);
23933 IRTemp argV = newTemp(Ity_V256);
23934 IRTemp rmode = newTemp(Ity_I32);
23935 if (epartIsReg(modrm)) {
23936 UInt rE = eregOfRexRM(pfx,modrm);
23937 assign( argV, getYMMReg(rE) );
23938 delta += 1;
23939 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
23940 } else {
23941 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23942 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
23943 delta += alen;
23944 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
23947 assign( rmode, get_sse_roundingmode() );
23948 IRTemp t3, t2, t1, t0;
23949 t3 = t2 = t1 = t0 = IRTemp_INVALID;
23950 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
23951 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
23952 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
23953 putXMMRegLane32F( rG, 3, CVT(t3) );
23954 putXMMRegLane32F( rG, 2, CVT(t2) );
23955 putXMMRegLane32F( rG, 1, CVT(t1) );
23956 putXMMRegLane32F( rG, 0, CVT(t0) );
23957 # undef CVT
23958 putYMMRegLane128( rG, 1, mkV128(0) );
23959 return delta;
23963 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
23965 IRTemp tLhi, tLlo, tRhi, tRlo;
23966 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
23967 IRTemp res = newTemp(Ity_V256);
23968 breakupV256toV128s( tL, &tLhi, &tLlo );
23969 breakupV256toV128s( tR, &tRhi, &tRlo );
23970 assign( res, binop( Iop_V128HLtoV256,
23971 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
23972 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
23973 return res;
23977 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
23979 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
23983 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
23985 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
23989 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
23991 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
23995 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
23997 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
24001 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
24003 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
24007 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
24009 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
24013 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
24015 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
24019 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
24021 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
24025 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
24027 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
24031 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
24033 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
24037 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
24039 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
24043 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
24045 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
24049 __attribute__((noinline))
24050 static
24051 Long dis_ESC_0F__VEX (
24052 /*MB_OUT*/DisResult* dres,
24053 /*OUT*/ Bool* uses_vvvv,
24054 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
24055 Bool resteerCisOk,
24056 void* callback_opaque,
24057 const VexArchInfo* archinfo,
24058 const VexAbiInfo* vbi,
24059 Prefix pfx, Int sz, Long deltaIN
24062 IRTemp addr = IRTemp_INVALID;
24063 Int alen = 0;
24064 HChar dis_buf[50];
24065 Long delta = deltaIN;
24066 UChar opc = getUChar(delta);
24067 delta++;
24068 *uses_vvvv = False;
24070 switch (opc) {
24072 case 0x10:
24073 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24074 /* Move 64 bits from E (mem only) to G (lo half xmm).
24075 Bits 255-64 of the dest are zeroed out. */
24076 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24077 UChar modrm = getUChar(delta);
24078 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24079 UInt rG = gregOfRexRM(pfx,modrm);
24080 IRTemp z128 = newTemp(Ity_V128);
24081 assign(z128, mkV128(0));
24082 putXMMReg( rG, mkexpr(z128) );
24083 /* FIXME: ALIGNMENT CHECK? */
24084 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
24085 putYMMRegLane128( rG, 1, mkexpr(z128) );
24086 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
24087 delta += alen;
24088 goto decode_success;
24090 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24091 /* Reg form. */
24092 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24093 UChar modrm = getUChar(delta);
24094 UInt rG = gregOfRexRM(pfx, modrm);
24095 UInt rE = eregOfRexRM(pfx, modrm);
24096 UInt rV = getVexNvvvv(pfx);
24097 delta++;
24098 DIP("vmovsd %s,%s,%s\n",
24099 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24100 IRTemp res = newTemp(Ity_V128);
24101 assign(res, binop(Iop_64HLtoV128,
24102 getXMMRegLane64(rV, 1),
24103 getXMMRegLane64(rE, 0)));
24104 putYMMRegLoAndZU(rG, mkexpr(res));
24105 *uses_vvvv = True;
24106 goto decode_success;
24108 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24109 /* Move 32 bits from E (mem only) to G (lo half xmm).
24110 Bits 255-32 of the dest are zeroed out. */
24111 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24112 UChar modrm = getUChar(delta);
24113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24114 UInt rG = gregOfRexRM(pfx,modrm);
24115 IRTemp z128 = newTemp(Ity_V128);
24116 assign(z128, mkV128(0));
24117 putXMMReg( rG, mkexpr(z128) );
24118 /* FIXME: ALIGNMENT CHECK? */
24119 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
24120 putYMMRegLane128( rG, 1, mkexpr(z128) );
24121 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
24122 delta += alen;
24123 goto decode_success;
24125 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24126 /* Reg form. */
24127 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24128 UChar modrm = getUChar(delta);
24129 UInt rG = gregOfRexRM(pfx, modrm);
24130 UInt rE = eregOfRexRM(pfx, modrm);
24131 UInt rV = getVexNvvvv(pfx);
24132 delta++;
24133 DIP("vmovss %s,%s,%s\n",
24134 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24135 IRTemp res = newTemp(Ity_V128);
24136 assign( res, binop( Iop_64HLtoV128,
24137 getXMMRegLane64(rV, 1),
24138 binop(Iop_32HLto64,
24139 getXMMRegLane32(rV, 1),
24140 getXMMRegLane32(rE, 0)) ) );
24141 putYMMRegLoAndZU(rG, mkexpr(res));
24142 *uses_vvvv = True;
24143 goto decode_success;
24145 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24146 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24147 UChar modrm = getUChar(delta);
24148 UInt rG = gregOfRexRM(pfx, modrm);
24149 if (epartIsReg(modrm)) {
24150 UInt rE = eregOfRexRM(pfx,modrm);
24151 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24152 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24153 delta += 1;
24154 } else {
24155 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24156 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24157 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
24158 delta += alen;
24160 goto decode_success;
24162 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24163 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24164 UChar modrm = getUChar(delta);
24165 UInt rG = gregOfRexRM(pfx, modrm);
24166 if (epartIsReg(modrm)) {
24167 UInt rE = eregOfRexRM(pfx,modrm);
24168 putYMMReg( rG, getYMMReg( rE ));
24169 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24170 delta += 1;
24171 } else {
24172 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24173 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24174 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
24175 delta += alen;
24177 goto decode_success;
24179 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24180 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24181 UChar modrm = getUChar(delta);
24182 UInt rG = gregOfRexRM(pfx, modrm);
24183 if (epartIsReg(modrm)) {
24184 UInt rE = eregOfRexRM(pfx,modrm);
24185 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24186 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24187 delta += 1;
24188 } else {
24189 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24190 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24191 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
24192 delta += alen;
24194 goto decode_success;
24196 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24197 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24198 UChar modrm = getUChar(delta);
24199 UInt rG = gregOfRexRM(pfx, modrm);
24200 if (epartIsReg(modrm)) {
24201 UInt rE = eregOfRexRM(pfx,modrm);
24202 putYMMReg( rG, getYMMReg( rE ));
24203 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24204 delta += 1;
24205 } else {
24206 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24207 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24208 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
24209 delta += alen;
24211 goto decode_success;
24213 break;
24215 case 0x11:
24216 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24217 /* Move 64 bits from G (low half xmm) to mem only. */
24218 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24219 UChar modrm = getUChar(delta);
24220 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24221 UInt rG = gregOfRexRM(pfx,modrm);
24222 /* FIXME: ALIGNMENT CHECK? */
24223 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
24224 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
24225 delta += alen;
24226 goto decode_success;
24228 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24229 /* Reg form. */
24230 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24231 UChar modrm = getUChar(delta);
24232 UInt rG = gregOfRexRM(pfx, modrm);
24233 UInt rE = eregOfRexRM(pfx, modrm);
24234 UInt rV = getVexNvvvv(pfx);
24235 delta++;
24236 DIP("vmovsd %s,%s,%s\n",
24237 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24238 IRTemp res = newTemp(Ity_V128);
24239 assign(res, binop(Iop_64HLtoV128,
24240 getXMMRegLane64(rV, 1),
24241 getXMMRegLane64(rG, 0)));
24242 putYMMRegLoAndZU(rE, mkexpr(res));
24243 *uses_vvvv = True;
24244 goto decode_success;
24246 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24247 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24248 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24249 UChar modrm = getUChar(delta);
24250 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24251 UInt rG = gregOfRexRM(pfx,modrm);
24252 /* FIXME: ALIGNMENT CHECK? */
24253 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
24254 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
24255 delta += alen;
24256 goto decode_success;
24258 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24259 /* Reg form. */
24260 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24261 UChar modrm = getUChar(delta);
24262 UInt rG = gregOfRexRM(pfx, modrm);
24263 UInt rE = eregOfRexRM(pfx, modrm);
24264 UInt rV = getVexNvvvv(pfx);
24265 delta++;
24266 DIP("vmovss %s,%s,%s\n",
24267 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24268 IRTemp res = newTemp(Ity_V128);
24269 assign( res, binop( Iop_64HLtoV128,
24270 getXMMRegLane64(rV, 1),
24271 binop(Iop_32HLto64,
24272 getXMMRegLane32(rV, 1),
24273 getXMMRegLane32(rG, 0)) ) );
24274 putYMMRegLoAndZU(rE, mkexpr(res));
24275 *uses_vvvv = True;
24276 goto decode_success;
24278 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24279 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24280 UChar modrm = getUChar(delta);
24281 UInt rG = gregOfRexRM(pfx,modrm);
24282 if (epartIsReg(modrm)) {
24283 UInt rE = eregOfRexRM(pfx,modrm);
24284 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24285 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24286 delta += 1;
24287 } else {
24288 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24289 storeLE( mkexpr(addr), getXMMReg(rG) );
24290 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
24291 delta += alen;
24293 goto decode_success;
24295 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24296 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24297 UChar modrm = getUChar(delta);
24298 UInt rG = gregOfRexRM(pfx,modrm);
24299 if (epartIsReg(modrm)) {
24300 UInt rE = eregOfRexRM(pfx,modrm);
24301 putYMMReg( rE, getYMMReg(rG) );
24302 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24303 delta += 1;
24304 } else {
24305 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24306 storeLE( mkexpr(addr), getYMMReg(rG) );
24307 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
24308 delta += alen;
24310 goto decode_success;
24312 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24313 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24314 UChar modrm = getUChar(delta);
24315 UInt rG = gregOfRexRM(pfx,modrm);
24316 if (epartIsReg(modrm)) {
24317 UInt rE = eregOfRexRM(pfx,modrm);
24318 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24319 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24320 delta += 1;
24321 } else {
24322 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24323 storeLE( mkexpr(addr), getXMMReg(rG) );
24324 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
24325 delta += alen;
24327 goto decode_success;
24329 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24330 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24331 UChar modrm = getUChar(delta);
24332 UInt rG = gregOfRexRM(pfx,modrm);
24333 if (epartIsReg(modrm)) {
24334 UInt rE = eregOfRexRM(pfx,modrm);
24335 putYMMReg( rE, getYMMReg(rG) );
24336 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24337 delta += 1;
24338 } else {
24339 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24340 storeLE( mkexpr(addr), getYMMReg(rG) );
24341 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
24342 delta += alen;
24344 goto decode_success;
24346 break;
24348 case 0x12:
24349 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24350 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24351 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
24352 goto decode_success;
24354 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24355 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24356 delta = dis_MOVDDUP_256( vbi, pfx, delta );
24357 goto decode_success;
24359 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24360 /* Insn only exists in reg form */
24361 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24362 && epartIsReg(getUChar(delta))) {
24363 UChar modrm = getUChar(delta);
24364 UInt rG = gregOfRexRM(pfx, modrm);
24365 UInt rE = eregOfRexRM(pfx, modrm);
24366 UInt rV = getVexNvvvv(pfx);
24367 delta++;
24368 DIP("vmovhlps %s,%s,%s\n",
24369 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24370 IRTemp res = newTemp(Ity_V128);
24371 assign(res, binop(Iop_64HLtoV128,
24372 getXMMRegLane64(rV, 1),
24373 getXMMRegLane64(rE, 1)));
24374 putYMMRegLoAndZU(rG, mkexpr(res));
24375 *uses_vvvv = True;
24376 goto decode_success;
24378 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24379 /* Insn exists only in mem form, it appears. */
24380 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24381 /* Insn exists only in mem form, it appears. */
24382 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24383 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24384 UChar modrm = getUChar(delta);
24385 UInt rG = gregOfRexRM(pfx, modrm);
24386 UInt rV = getVexNvvvv(pfx);
24387 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24388 delta += alen;
24389 DIP("vmovlpd %s,%s,%s\n",
24390 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24391 IRTemp res = newTemp(Ity_V128);
24392 assign(res, binop(Iop_64HLtoV128,
24393 getXMMRegLane64(rV, 1),
24394 loadLE(Ity_I64, mkexpr(addr))));
24395 putYMMRegLoAndZU(rG, mkexpr(res));
24396 *uses_vvvv = True;
24397 goto decode_success;
24399 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24400 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24401 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24402 True/*isL*/ );
24403 goto decode_success;
24405 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24406 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24407 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
24408 goto decode_success;
24410 break;
24412 case 0x13:
24413 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24414 /* Insn exists only in mem form, it appears. */
24415 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24416 /* Insn exists only in mem form, it appears. */
24417 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24418 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24419 UChar modrm = getUChar(delta);
24420 UInt rG = gregOfRexRM(pfx, modrm);
24421 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24422 delta += alen;
24423 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
24424 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
24425 goto decode_success;
24427 break;
24429 case 0x14:
24430 case 0x15:
24431 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24432 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24433 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24434 Bool hi = opc == 0x15;
24435 UChar modrm = getUChar(delta);
24436 UInt rG = gregOfRexRM(pfx,modrm);
24437 UInt rV = getVexNvvvv(pfx);
24438 IRTemp eV = newTemp(Ity_V128);
24439 IRTemp vV = newTemp(Ity_V128);
24440 assign( vV, getXMMReg(rV) );
24441 if (epartIsReg(modrm)) {
24442 UInt rE = eregOfRexRM(pfx,modrm);
24443 assign( eV, getXMMReg(rE) );
24444 delta += 1;
24445 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24446 nameXMMReg(rE), nameXMMReg(rG));
24447 } else {
24448 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24449 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24450 delta += alen;
24451 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24452 dis_buf, nameXMMReg(rG));
24454 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
24455 putYMMRegLoAndZU( rG, mkexpr(res) );
24456 *uses_vvvv = True;
24457 goto decode_success;
24459 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24460 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24461 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24462 Bool hi = opc == 0x15;
24463 UChar modrm = getUChar(delta);
24464 UInt rG = gregOfRexRM(pfx,modrm);
24465 UInt rV = getVexNvvvv(pfx);
24466 IRTemp eV = newTemp(Ity_V256);
24467 IRTemp vV = newTemp(Ity_V256);
24468 assign( vV, getYMMReg(rV) );
24469 if (epartIsReg(modrm)) {
24470 UInt rE = eregOfRexRM(pfx,modrm);
24471 assign( eV, getYMMReg(rE) );
24472 delta += 1;
24473 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24474 nameYMMReg(rE), nameYMMReg(rG));
24475 } else {
24476 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24477 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24478 delta += alen;
24479 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24480 dis_buf, nameYMMReg(rG));
24482 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
24483 putYMMReg( rG, mkexpr(res) );
24484 *uses_vvvv = True;
24485 goto decode_success;
24487 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24488 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24489 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24490 Bool hi = opc == 0x15;
24491 UChar modrm = getUChar(delta);
24492 UInt rG = gregOfRexRM(pfx,modrm);
24493 UInt rV = getVexNvvvv(pfx);
24494 IRTemp eV = newTemp(Ity_V128);
24495 IRTemp vV = newTemp(Ity_V128);
24496 assign( vV, getXMMReg(rV) );
24497 if (epartIsReg(modrm)) {
24498 UInt rE = eregOfRexRM(pfx,modrm);
24499 assign( eV, getXMMReg(rE) );
24500 delta += 1;
24501 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24502 nameXMMReg(rE), nameXMMReg(rG));
24503 } else {
24504 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24505 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24506 delta += alen;
24507 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24508 dis_buf, nameXMMReg(rG));
24510 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
24511 putYMMRegLoAndZU( rG, mkexpr(res) );
24512 *uses_vvvv = True;
24513 goto decode_success;
24515 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24516 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24517 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24518 Bool hi = opc == 0x15;
24519 UChar modrm = getUChar(delta);
24520 UInt rG = gregOfRexRM(pfx,modrm);
24521 UInt rV = getVexNvvvv(pfx);
24522 IRTemp eV = newTemp(Ity_V256);
24523 IRTemp vV = newTemp(Ity_V256);
24524 assign( vV, getYMMReg(rV) );
24525 if (epartIsReg(modrm)) {
24526 UInt rE = eregOfRexRM(pfx,modrm);
24527 assign( eV, getYMMReg(rE) );
24528 delta += 1;
24529 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24530 nameYMMReg(rE), nameYMMReg(rG));
24531 } else {
24532 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24533 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24534 delta += alen;
24535 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24536 dis_buf, nameYMMReg(rG));
24538 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
24539 putYMMReg( rG, mkexpr(res) );
24540 *uses_vvvv = True;
24541 goto decode_success;
24543 break;
24545 case 0x16:
24546 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24547 /* Insn only exists in reg form */
24548 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24549 && epartIsReg(getUChar(delta))) {
24550 UChar modrm = getUChar(delta);
24551 UInt rG = gregOfRexRM(pfx, modrm);
24552 UInt rE = eregOfRexRM(pfx, modrm);
24553 UInt rV = getVexNvvvv(pfx);
24554 delta++;
24555 DIP("vmovlhps %s,%s,%s\n",
24556 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24557 IRTemp res = newTemp(Ity_V128);
24558 assign(res, binop(Iop_64HLtoV128,
24559 getXMMRegLane64(rE, 0),
24560 getXMMRegLane64(rV, 0)));
24561 putYMMRegLoAndZU(rG, mkexpr(res));
24562 *uses_vvvv = True;
24563 goto decode_success;
24565 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24566 /* Insn exists only in mem form, it appears. */
24567 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24568 /* Insn exists only in mem form, it appears. */
24569 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24570 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24571 UChar modrm = getUChar(delta);
24572 UInt rG = gregOfRexRM(pfx, modrm);
24573 UInt rV = getVexNvvvv(pfx);
24574 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24575 delta += alen;
24576 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
24577 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24578 IRTemp res = newTemp(Ity_V128);
24579 assign(res, binop(Iop_64HLtoV128,
24580 loadLE(Ity_I64, mkexpr(addr)),
24581 getXMMRegLane64(rV, 0)));
24582 putYMMRegLoAndZU(rG, mkexpr(res));
24583 *uses_vvvv = True;
24584 goto decode_success;
24586 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24587 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24588 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24589 False/*!isL*/ );
24590 goto decode_success;
24592 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24593 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24594 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
24595 goto decode_success;
24597 break;
24599 case 0x17:
24600 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24601 /* Insn exists only in mem form, it appears. */
24602 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24603 /* Insn exists only in mem form, it appears. */
24604 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24605 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24606 UChar modrm = getUChar(delta);
24607 UInt rG = gregOfRexRM(pfx, modrm);
24608 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24609 delta += alen;
24610 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
24611 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24612 nameXMMReg(rG), dis_buf);
24613 goto decode_success;
24615 break;
24617 case 0x28:
24618 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24619 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24620 UChar modrm = getUChar(delta);
24621 UInt rG = gregOfRexRM(pfx, modrm);
24622 if (epartIsReg(modrm)) {
24623 UInt rE = eregOfRexRM(pfx,modrm);
24624 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24625 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24626 delta += 1;
24627 } else {
24628 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24629 gen_SEGV_if_not_16_aligned( addr );
24630 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24631 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
24632 delta += alen;
24634 goto decode_success;
24636 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24637 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24638 UChar modrm = getUChar(delta);
24639 UInt rG = gregOfRexRM(pfx, modrm);
24640 if (epartIsReg(modrm)) {
24641 UInt rE = eregOfRexRM(pfx,modrm);
24642 putYMMReg( rG, getYMMReg( rE ));
24643 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24644 delta += 1;
24645 } else {
24646 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24647 gen_SEGV_if_not_32_aligned( addr );
24648 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24649 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
24650 delta += alen;
24652 goto decode_success;
24654 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24655 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24656 UChar modrm = getUChar(delta);
24657 UInt rG = gregOfRexRM(pfx, modrm);
24658 if (epartIsReg(modrm)) {
24659 UInt rE = eregOfRexRM(pfx,modrm);
24660 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24661 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24662 delta += 1;
24663 } else {
24664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24665 gen_SEGV_if_not_16_aligned( addr );
24666 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24667 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24668 delta += alen;
24670 goto decode_success;
24672 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24673 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24674 UChar modrm = getUChar(delta);
24675 UInt rG = gregOfRexRM(pfx, modrm);
24676 if (epartIsReg(modrm)) {
24677 UInt rE = eregOfRexRM(pfx,modrm);
24678 putYMMReg( rG, getYMMReg( rE ));
24679 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24680 delta += 1;
24681 } else {
24682 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24683 gen_SEGV_if_not_32_aligned( addr );
24684 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24685 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24686 delta += alen;
24688 goto decode_success;
24690 break;
24692 case 0x29:
24693 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24694 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24695 UChar modrm = getUChar(delta);
24696 UInt rG = gregOfRexRM(pfx,modrm);
24697 if (epartIsReg(modrm)) {
24698 UInt rE = eregOfRexRM(pfx,modrm);
24699 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24700 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24701 delta += 1;
24702 } else {
24703 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24704 gen_SEGV_if_not_16_aligned( addr );
24705 storeLE( mkexpr(addr), getXMMReg(rG) );
24706 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24707 delta += alen;
24709 goto decode_success;
24711 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24712 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24713 UChar modrm = getUChar(delta);
24714 UInt rG = gregOfRexRM(pfx,modrm);
24715 if (epartIsReg(modrm)) {
24716 UInt rE = eregOfRexRM(pfx,modrm);
24717 putYMMReg( rE, getYMMReg(rG) );
24718 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24719 delta += 1;
24720 } else {
24721 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24722 gen_SEGV_if_not_32_aligned( addr );
24723 storeLE( mkexpr(addr), getYMMReg(rG) );
24724 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24725 delta += alen;
24727 goto decode_success;
24729 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24730 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24731 UChar modrm = getUChar(delta);
24732 UInt rG = gregOfRexRM(pfx,modrm);
24733 if (epartIsReg(modrm)) {
24734 UInt rE = eregOfRexRM(pfx,modrm);
24735 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24736 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24737 delta += 1;
24738 goto decode_success;
24739 } else {
24740 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24741 gen_SEGV_if_not_16_aligned( addr );
24742 storeLE( mkexpr(addr), getXMMReg(rG) );
24743 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24744 delta += alen;
24745 goto decode_success;
24748 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24749 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24750 UChar modrm = getUChar(delta);
24751 UInt rG = gregOfRexRM(pfx,modrm);
24752 if (epartIsReg(modrm)) {
24753 UInt rE = eregOfRexRM(pfx,modrm);
24754 putYMMReg( rE, getYMMReg(rG) );
24755 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24756 delta += 1;
24757 goto decode_success;
24758 } else {
24759 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24760 gen_SEGV_if_not_32_aligned( addr );
24761 storeLE( mkexpr(addr), getYMMReg(rG) );
24762 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
24763 delta += alen;
24764 goto decode_success;
24767 break;
24769 case 0x2A: {
24770 IRTemp rmode = newTemp(Ity_I32);
24771 assign( rmode, get_sse_roundingmode() );
24772 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24773 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24774 UChar modrm = getUChar(delta);
24775 UInt rV = getVexNvvvv(pfx);
24776 UInt rD = gregOfRexRM(pfx, modrm);
24777 IRTemp arg32 = newTemp(Ity_I32);
24778 if (epartIsReg(modrm)) {
24779 UInt rS = eregOfRexRM(pfx,modrm);
24780 assign( arg32, getIReg32(rS) );
24781 delta += 1;
24782 DIP("vcvtsi2sdl %s,%s,%s\n",
24783 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24784 } else {
24785 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24786 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24787 delta += alen;
24788 DIP("vcvtsi2sdl %s,%s,%s\n",
24789 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24791 putXMMRegLane64F( rD, 0,
24792 unop(Iop_I32StoF64, mkexpr(arg32)));
24793 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24794 putYMMRegLane128( rD, 1, mkV128(0) );
24795 *uses_vvvv = True;
24796 goto decode_success;
24798 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24799 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24800 UChar modrm = getUChar(delta);
24801 UInt rV = getVexNvvvv(pfx);
24802 UInt rD = gregOfRexRM(pfx, modrm);
24803 IRTemp arg64 = newTemp(Ity_I64);
24804 if (epartIsReg(modrm)) {
24805 UInt rS = eregOfRexRM(pfx,modrm);
24806 assign( arg64, getIReg64(rS) );
24807 delta += 1;
24808 DIP("vcvtsi2sdq %s,%s,%s\n",
24809 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24810 } else {
24811 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24812 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24813 delta += alen;
24814 DIP("vcvtsi2sdq %s,%s,%s\n",
24815 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24817 putXMMRegLane64F( rD, 0,
24818 binop( Iop_I64StoF64,
24819 get_sse_roundingmode(),
24820 mkexpr(arg64)) );
24821 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24822 putYMMRegLane128( rD, 1, mkV128(0) );
24823 *uses_vvvv = True;
24824 goto decode_success;
24826 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24827 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24828 UChar modrm = getUChar(delta);
24829 UInt rV = getVexNvvvv(pfx);
24830 UInt rD = gregOfRexRM(pfx, modrm);
24831 IRTemp arg64 = newTemp(Ity_I64);
24832 if (epartIsReg(modrm)) {
24833 UInt rS = eregOfRexRM(pfx,modrm);
24834 assign( arg64, getIReg64(rS) );
24835 delta += 1;
24836 DIP("vcvtsi2ssq %s,%s,%s\n",
24837 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24838 } else {
24839 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24840 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24841 delta += alen;
24842 DIP("vcvtsi2ssq %s,%s,%s\n",
24843 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24845 putXMMRegLane32F( rD, 0,
24846 binop(Iop_F64toF32,
24847 mkexpr(rmode),
24848 binop(Iop_I64StoF64, mkexpr(rmode),
24849 mkexpr(arg64)) ) );
24850 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24851 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24852 putYMMRegLane128( rD, 1, mkV128(0) );
24853 *uses_vvvv = True;
24854 goto decode_success;
24856 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24857 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24858 UChar modrm = getUChar(delta);
24859 UInt rV = getVexNvvvv(pfx);
24860 UInt rD = gregOfRexRM(pfx, modrm);
24861 IRTemp arg32 = newTemp(Ity_I32);
24862 if (epartIsReg(modrm)) {
24863 UInt rS = eregOfRexRM(pfx,modrm);
24864 assign( arg32, getIReg32(rS) );
24865 delta += 1;
24866 DIP("vcvtsi2ssl %s,%s,%s\n",
24867 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24868 } else {
24869 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24870 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24871 delta += alen;
24872 DIP("vcvtsi2ssl %s,%s,%s\n",
24873 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24875 putXMMRegLane32F( rD, 0,
24876 binop(Iop_F64toF32,
24877 mkexpr(rmode),
24878 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
24879 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24880 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24881 putYMMRegLane128( rD, 1, mkV128(0) );
24882 *uses_vvvv = True;
24883 goto decode_success;
24885 break;
24888 case 0x2B:
24889 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
24890 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
24891 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24892 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24893 UChar modrm = getUChar(delta);
24894 UInt rS = gregOfRexRM(pfx, modrm);
24895 IRTemp tS = newTemp(Ity_V128);
24896 assign(tS, getXMMReg(rS));
24897 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24898 delta += alen;
24899 gen_SEGV_if_not_16_aligned(addr);
24900 storeLE(mkexpr(addr), mkexpr(tS));
24901 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24902 nameXMMReg(rS), dis_buf);
24903 goto decode_success;
24905 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
24906 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
24907 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24908 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
24909 UChar modrm = getUChar(delta);
24910 UInt rS = gregOfRexRM(pfx, modrm);
24911 IRTemp tS = newTemp(Ity_V256);
24912 assign(tS, getYMMReg(rS));
24913 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24914 delta += alen;
24915 gen_SEGV_if_not_32_aligned(addr);
24916 storeLE(mkexpr(addr), mkexpr(tS));
24917 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24918 nameYMMReg(rS), dis_buf);
24919 goto decode_success;
24921 break;
24923 case 0x2C:
24924 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
24925 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24926 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24927 goto decode_success;
24929 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
24930 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24931 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24932 goto decode_success;
24934 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
24935 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24936 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24937 goto decode_success;
24939 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
24940 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24941 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24942 goto decode_success;
24944 break;
24946 case 0x2D:
24947 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
24948 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24949 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24950 goto decode_success;
24952 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
24953 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24954 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24955 goto decode_success;
24957 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
24958 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24959 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24960 goto decode_success;
24962 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
24963 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24964 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24965 goto decode_success;
24967 break;
24969 case 0x2E:
24970 case 0x2F:
24971 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
24972 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
24973 if (have66noF2noF3(pfx)) {
24974 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
24975 goto decode_success;
24977 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
24978 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
24979 if (haveNo66noF2noF3(pfx)) {
24980 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
24981 goto decode_success;
24983 break;
24985 case 0x50:
24986 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
24987 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24988 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
24989 goto decode_success;
24991 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
24992 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24993 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
24994 goto decode_success;
24996 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
24997 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24998 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
24999 goto decode_success;
25001 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25002 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25003 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
25004 goto decode_success;
25006 break;
25008 case 0x51:
25009 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25010 if (haveF3no66noF2(pfx)) {
25011 delta = dis_AVX128_E_V_to_G_lo32_unary(
25012 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
25013 goto decode_success;
25015 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25016 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25017 delta = dis_AVX128_E_to_G_unary_all(
25018 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
25019 goto decode_success;
25021 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25022 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25023 delta = dis_AVX256_E_to_G_unary_all(
25024 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
25025 goto decode_success;
25027 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25028 if (haveF2no66noF3(pfx)) {
25029 delta = dis_AVX128_E_V_to_G_lo64_unary(
25030 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
25031 goto decode_success;
25033 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25034 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25035 delta = dis_AVX128_E_to_G_unary_all(
25036 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
25037 goto decode_success;
25039 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25040 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25041 delta = dis_AVX256_E_to_G_unary_all(
25042 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
25043 goto decode_success;
25045 break;
25047 case 0x52:
25048 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25049 if (haveF3no66noF2(pfx)) {
25050 delta = dis_AVX128_E_V_to_G_lo32_unary(
25051 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
25052 Iop_RSqrtEst32F0x4 );
25053 goto decode_success;
25055 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25056 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25057 delta = dis_AVX128_E_to_G_unary_all(
25058 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
25059 goto decode_success;
25061 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25062 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25063 delta = dis_AVX256_E_to_G_unary_all(
25064 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
25065 goto decode_success;
25067 break;
25069 case 0x53:
25070 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25071 if (haveF3no66noF2(pfx)) {
25072 delta = dis_AVX128_E_V_to_G_lo32_unary(
25073 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
25074 goto decode_success;
25076 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25077 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25078 delta = dis_AVX128_E_to_G_unary_all(
25079 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
25080 goto decode_success;
25082 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25083 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25084 delta = dis_AVX256_E_to_G_unary_all(
25085 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
25086 goto decode_success;
25088 break;
25090 case 0x54:
25091 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25092 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25093 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25094 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25095 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
25096 goto decode_success;
25098 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25099 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25100 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25101 delta = dis_AVX256_E_V_to_G(
25102 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
25103 goto decode_success;
25105 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25106 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25107 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25108 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
25109 goto decode_success;
25111 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25112 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25113 delta = dis_AVX256_E_V_to_G(
25114 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
25115 goto decode_success;
25117 break;
25119 case 0x55:
25120 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25121 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25122 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25123 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25124 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
25125 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25126 goto decode_success;
25128 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25129 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25130 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25131 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
25132 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25133 goto decode_success;
25135 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25136 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25137 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25138 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
25139 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25140 goto decode_success;
25142 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25143 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25144 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25145 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
25146 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25147 goto decode_success;
25149 break;
25151 case 0x56:
25152 /* VORPD r/m, rV, r ::: r = rV | r/m */
25153 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25154 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25155 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25156 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
25157 goto decode_success;
25159 /* VORPD r/m, rV, r ::: r = rV | r/m */
25160 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25161 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25162 delta = dis_AVX256_E_V_to_G(
25163 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
25164 goto decode_success;
25166 /* VORPS r/m, rV, r ::: r = rV | r/m */
25167 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25168 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25169 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25170 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
25171 goto decode_success;
25173 /* VORPS r/m, rV, r ::: r = rV | r/m */
25174 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25175 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25176 delta = dis_AVX256_E_V_to_G(
25177 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
25178 goto decode_success;
25180 break;
25182 case 0x57:
25183 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25184 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25185 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25186 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25187 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
25188 goto decode_success;
25190 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25191 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25192 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25193 delta = dis_AVX256_E_V_to_G(
25194 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
25195 goto decode_success;
25197 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25198 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25199 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25200 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25201 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
25202 goto decode_success;
25204 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25205 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25206 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25207 delta = dis_AVX256_E_V_to_G(
25208 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
25209 goto decode_success;
25211 break;
25213 case 0x58:
25214 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25215 if (haveF2no66noF3(pfx)) {
25216 delta = dis_AVX128_E_V_to_G_lo64(
25217 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
25218 goto decode_success;
25220 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25221 if (haveF3no66noF2(pfx)) {
25222 delta = dis_AVX128_E_V_to_G_lo32(
25223 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
25224 goto decode_success;
25226 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25227 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25228 delta = dis_AVX128_E_V_to_G(
25229 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
25230 goto decode_success;
25232 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25233 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25234 delta = dis_AVX256_E_V_to_G(
25235 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
25236 goto decode_success;
25238 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25239 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25240 delta = dis_AVX128_E_V_to_G(
25241 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
25242 goto decode_success;
25244 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25245 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25246 delta = dis_AVX256_E_V_to_G(
25247 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
25248 goto decode_success;
25250 break;
25252 case 0x59:
25253 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25254 if (haveF2no66noF3(pfx)) {
25255 delta = dis_AVX128_E_V_to_G_lo64(
25256 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
25257 goto decode_success;
25259 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25260 if (haveF3no66noF2(pfx)) {
25261 delta = dis_AVX128_E_V_to_G_lo32(
25262 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
25263 goto decode_success;
25265 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25266 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25267 delta = dis_AVX128_E_V_to_G(
25268 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
25269 goto decode_success;
25271 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25272 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25273 delta = dis_AVX256_E_V_to_G(
25274 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
25275 goto decode_success;
25277 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25278 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25279 delta = dis_AVX128_E_V_to_G(
25280 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
25281 goto decode_success;
25283 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25284 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25285 delta = dis_AVX256_E_V_to_G(
25286 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
25287 goto decode_success;
25289 break;
25291 case 0x5A:
25292 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25293 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25294 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
25295 goto decode_success;
25297 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25298 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25299 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
25300 goto decode_success;
25302 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25303 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25304 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
25305 goto decode_success;
25307 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25308 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25309 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
25310 goto decode_success;
25312 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25313 if (haveF2no66noF3(pfx)) {
25314 UChar modrm = getUChar(delta);
25315 UInt rV = getVexNvvvv(pfx);
25316 UInt rD = gregOfRexRM(pfx, modrm);
25317 IRTemp f64lo = newTemp(Ity_F64);
25318 IRTemp rmode = newTemp(Ity_I32);
25319 assign( rmode, get_sse_roundingmode() );
25320 if (epartIsReg(modrm)) {
25321 UInt rS = eregOfRexRM(pfx,modrm);
25322 assign(f64lo, getXMMRegLane64F(rS, 0));
25323 delta += 1;
25324 DIP("vcvtsd2ss %s,%s,%s\n",
25325 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25326 } else {
25327 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25328 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
25329 delta += alen;
25330 DIP("vcvtsd2ss %s,%s,%s\n",
25331 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25333 putXMMRegLane32F( rD, 0,
25334 binop( Iop_F64toF32, mkexpr(rmode),
25335 mkexpr(f64lo)) );
25336 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25337 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25338 putYMMRegLane128( rD, 1, mkV128(0) );
25339 *uses_vvvv = True;
25340 goto decode_success;
25342 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25343 if (haveF3no66noF2(pfx)) {
25344 UChar modrm = getUChar(delta);
25345 UInt rV = getVexNvvvv(pfx);
25346 UInt rD = gregOfRexRM(pfx, modrm);
25347 IRTemp f32lo = newTemp(Ity_F32);
25348 if (epartIsReg(modrm)) {
25349 UInt rS = eregOfRexRM(pfx,modrm);
25350 assign(f32lo, getXMMRegLane32F(rS, 0));
25351 delta += 1;
25352 DIP("vcvtss2sd %s,%s,%s\n",
25353 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25354 } else {
25355 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25356 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
25357 delta += alen;
25358 DIP("vcvtss2sd %s,%s,%s\n",
25359 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25361 putXMMRegLane64F( rD, 0,
25362 unop( Iop_F32toF64, mkexpr(f32lo)) );
25363 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25364 putYMMRegLane128( rD, 1, mkV128(0) );
25365 *uses_vvvv = True;
25366 goto decode_success;
25368 break;
25370 case 0x5B:
25371 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25372 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25373 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25374 True/*isAvx*/, False/*!r2zero*/ );
25375 goto decode_success;
25377 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25378 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25379 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25380 False/*!r2zero*/ );
25381 goto decode_success;
25383 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25384 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25385 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25386 True/*isAvx*/, True/*r2zero*/ );
25387 goto decode_success;
25389 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25390 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25391 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25392 True/*r2zero*/ );
25393 goto decode_success;
25395 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25396 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25397 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
25398 goto decode_success;
25400 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25401 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25402 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
25403 goto decode_success;
25405 break;
25407 case 0x5C:
25408 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25409 if (haveF2no66noF3(pfx)) {
25410 delta = dis_AVX128_E_V_to_G_lo64(
25411 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
25412 goto decode_success;
25414 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25415 if (haveF3no66noF2(pfx)) {
25416 delta = dis_AVX128_E_V_to_G_lo32(
25417 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
25418 goto decode_success;
25420 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25421 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25422 delta = dis_AVX128_E_V_to_G(
25423 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
25424 goto decode_success;
25426 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25427 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25428 delta = dis_AVX256_E_V_to_G(
25429 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
25430 goto decode_success;
25432 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25433 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25434 delta = dis_AVX128_E_V_to_G(
25435 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
25436 goto decode_success;
25438 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25439 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25440 delta = dis_AVX256_E_V_to_G(
25441 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
25442 goto decode_success;
25444 break;
25446 case 0x5D:
25447 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25448 if (haveF2no66noF3(pfx)) {
25449 delta = dis_AVX128_E_V_to_G_lo64(
25450 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
25451 goto decode_success;
25453 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25454 if (haveF3no66noF2(pfx)) {
25455 delta = dis_AVX128_E_V_to_G_lo32(
25456 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
25457 goto decode_success;
25459 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25460 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25461 delta = dis_AVX128_E_V_to_G(
25462 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
25463 goto decode_success;
25465 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25466 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25467 delta = dis_AVX256_E_V_to_G(
25468 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
25469 goto decode_success;
25471 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25472 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25473 delta = dis_AVX128_E_V_to_G(
25474 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
25475 goto decode_success;
25477 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25478 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25479 delta = dis_AVX256_E_V_to_G(
25480 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
25481 goto decode_success;
25483 break;
25485 case 0x5E:
25486 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25487 if (haveF2no66noF3(pfx)) {
25488 delta = dis_AVX128_E_V_to_G_lo64(
25489 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
25490 goto decode_success;
25492 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25493 if (haveF3no66noF2(pfx)) {
25494 delta = dis_AVX128_E_V_to_G_lo32(
25495 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
25496 goto decode_success;
25498 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25499 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25500 delta = dis_AVX128_E_V_to_G(
25501 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
25502 goto decode_success;
25504 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25505 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25506 delta = dis_AVX256_E_V_to_G(
25507 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
25508 goto decode_success;
25510 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25511 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25512 delta = dis_AVX128_E_V_to_G(
25513 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
25514 goto decode_success;
25516 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25517 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25518 delta = dis_AVX256_E_V_to_G(
25519 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
25520 goto decode_success;
25522 break;
25524 case 0x5F:
25525 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25526 if (haveF2no66noF3(pfx)) {
25527 delta = dis_AVX128_E_V_to_G_lo64(
25528 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
25529 goto decode_success;
25531 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25532 if (haveF3no66noF2(pfx)) {
25533 delta = dis_AVX128_E_V_to_G_lo32(
25534 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
25535 goto decode_success;
25537 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25538 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25539 delta = dis_AVX128_E_V_to_G(
25540 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
25541 goto decode_success;
25543 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25544 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25545 delta = dis_AVX256_E_V_to_G(
25546 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
25547 goto decode_success;
25549 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25550 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25551 delta = dis_AVX128_E_V_to_G(
25552 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
25553 goto decode_success;
25555 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25556 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25557 delta = dis_AVX256_E_V_to_G(
25558 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
25559 goto decode_success;
25561 break;
25563 case 0x60:
25564 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25565 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25566 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25567 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25568 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25569 Iop_InterleaveLO8x16, NULL,
25570 False/*!invertLeftArg*/, True/*swapArgs*/ );
25571 goto decode_success;
25573 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25574 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25575 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25576 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25577 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25578 math_VPUNPCKLBW_YMM );
25579 goto decode_success;
25581 break;
25583 case 0x61:
25584 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25585 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25586 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25587 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25588 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25589 Iop_InterleaveLO16x8, NULL,
25590 False/*!invertLeftArg*/, True/*swapArgs*/ );
25591 goto decode_success;
25593 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25594 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25595 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25596 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25597 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25598 math_VPUNPCKLWD_YMM );
25599 goto decode_success;
25601 break;
25603 case 0x62:
25604 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25605 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25606 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25607 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25608 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25609 Iop_InterleaveLO32x4, NULL,
25610 False/*!invertLeftArg*/, True/*swapArgs*/ );
25611 goto decode_success;
25613 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25614 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25615 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25616 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25617 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25618 math_VPUNPCKLDQ_YMM );
25619 goto decode_success;
25621 break;
25623 case 0x63:
25624 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25625 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25626 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25627 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25628 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25629 Iop_QNarrowBin16Sto8Sx16, NULL,
25630 False/*!invertLeftArg*/, True/*swapArgs*/ );
25631 goto decode_success;
25633 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25634 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25635 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25636 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25637 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25638 math_VPACKSSWB_YMM );
25639 goto decode_success;
25641 break;
25643 case 0x64:
25644 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25645 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25646 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25647 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25648 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
25649 goto decode_success;
25651 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25652 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25653 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25654 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25655 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
25656 goto decode_success;
25658 break;
25660 case 0x65:
25661 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25662 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25663 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25664 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25665 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25666 goto decode_success;
25668 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25669 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25670 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25671 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25672 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25673 goto decode_success;
25675 break;
25677 case 0x66:
25678 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25679 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25680 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25681 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25682 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25683 goto decode_success;
25685 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25686 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25687 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25688 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25689 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25690 goto decode_success;
25692 break;
25694 case 0x67:
25695 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25696 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25697 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25698 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25699 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25700 Iop_QNarrowBin16Sto8Ux16, NULL,
25701 False/*!invertLeftArg*/, True/*swapArgs*/ );
25702 goto decode_success;
25704 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25705 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25706 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25707 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25708 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25709 math_VPACKUSWB_YMM );
25710 goto decode_success;
25712 break;
25714 case 0x68:
25715 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25716 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25717 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25718 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25719 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25720 Iop_InterleaveHI8x16, NULL,
25721 False/*!invertLeftArg*/, True/*swapArgs*/ );
25722 goto decode_success;
25724 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25725 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25726 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25727 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25728 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25729 math_VPUNPCKHBW_YMM );
25730 goto decode_success;
25732 break;
25734 case 0x69:
25735 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25736 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25737 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25738 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25739 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25740 Iop_InterleaveHI16x8, NULL,
25741 False/*!invertLeftArg*/, True/*swapArgs*/ );
25742 goto decode_success;
25744 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25745 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25746 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25747 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25748 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25749 math_VPUNPCKHWD_YMM );
25750 goto decode_success;
25752 break;
25754 case 0x6A:
25755 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25756 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25757 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25758 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25759 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25760 Iop_InterleaveHI32x4, NULL,
25761 False/*!invertLeftArg*/, True/*swapArgs*/ );
25762 goto decode_success;
25764 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25765 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25766 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25767 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25768 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25769 math_VPUNPCKHDQ_YMM );
25770 goto decode_success;
25772 break;
25774 case 0x6B:
25775 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25776 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25777 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25778 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25779 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25780 Iop_QNarrowBin32Sto16Sx8, NULL,
25781 False/*!invertLeftArg*/, True/*swapArgs*/ );
25782 goto decode_success;
25784 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25785 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25786 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25787 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25788 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25789 math_VPACKSSDW_YMM );
25790 goto decode_success;
25792 break;
25794 case 0x6C:
25795 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25796 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25797 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25798 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25799 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25800 Iop_InterleaveLO64x2, NULL,
25801 False/*!invertLeftArg*/, True/*swapArgs*/ );
25802 goto decode_success;
25804 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25805 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25806 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25807 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25808 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25809 math_VPUNPCKLQDQ_YMM );
25810 goto decode_success;
25812 break;
25814 case 0x6D:
25815 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25816 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25817 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25818 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25819 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25820 Iop_InterleaveHI64x2, NULL,
25821 False/*!invertLeftArg*/, True/*swapArgs*/ );
25822 goto decode_success;
25824 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25825 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25826 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25827 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25828 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25829 math_VPUNPCKHQDQ_YMM );
25830 goto decode_success;
25832 break;
25834 case 0x6E:
25835 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25836 if (have66noF2noF3(pfx)
25837 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25838 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25839 UChar modrm = getUChar(delta);
25840 if (epartIsReg(modrm)) {
25841 delta += 1;
25842 putYMMRegLoAndZU(
25843 gregOfRexRM(pfx,modrm),
25844 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25846 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25847 nameXMMReg(gregOfRexRM(pfx,modrm)));
25848 } else {
25849 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25850 delta += alen;
25851 putYMMRegLoAndZU(
25852 gregOfRexRM(pfx,modrm),
25853 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
25855 DIP("vmovd %s, %s\n", dis_buf,
25856 nameXMMReg(gregOfRexRM(pfx,modrm)));
25858 goto decode_success;
25860 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25861 if (have66noF2noF3(pfx)
25862 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
25863 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
25864 UChar modrm = getUChar(delta);
25865 if (epartIsReg(modrm)) {
25866 delta += 1;
25867 putYMMRegLoAndZU(
25868 gregOfRexRM(pfx,modrm),
25869 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
25871 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
25872 nameXMMReg(gregOfRexRM(pfx,modrm)));
25873 } else {
25874 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25875 delta += alen;
25876 putYMMRegLoAndZU(
25877 gregOfRexRM(pfx,modrm),
25878 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
25880 DIP("vmovq %s, %s\n", dis_buf,
25881 nameXMMReg(gregOfRexRM(pfx,modrm)));
25883 goto decode_success;
25885 break;
25887 case 0x6F:
25888 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
25889 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
25890 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25891 && 1==getVexL(pfx)/*256*/) {
25892 UChar modrm = getUChar(delta);
25893 UInt rD = gregOfRexRM(pfx, modrm);
25894 IRTemp tD = newTemp(Ity_V256);
25895 Bool isA = have66noF2noF3(pfx);
25896 HChar ch = isA ? 'a' : 'u';
25897 if (epartIsReg(modrm)) {
25898 UInt rS = eregOfRexRM(pfx, modrm);
25899 delta += 1;
25900 assign(tD, getYMMReg(rS));
25901 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
25902 } else {
25903 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25904 delta += alen;
25905 if (isA)
25906 gen_SEGV_if_not_32_aligned(addr);
25907 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
25908 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
25910 putYMMReg(rD, mkexpr(tD));
25911 goto decode_success;
25913 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
25914 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
25915 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25916 && 0==getVexL(pfx)/*128*/) {
25917 UChar modrm = getUChar(delta);
25918 UInt rD = gregOfRexRM(pfx, modrm);
25919 IRTemp tD = newTemp(Ity_V128);
25920 Bool isA = have66noF2noF3(pfx);
25921 HChar ch = isA ? 'a' : 'u';
25922 if (epartIsReg(modrm)) {
25923 UInt rS = eregOfRexRM(pfx, modrm);
25924 delta += 1;
25925 assign(tD, getXMMReg(rS));
25926 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
25927 } else {
25928 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25929 delta += alen;
25930 if (isA)
25931 gen_SEGV_if_not_16_aligned(addr);
25932 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
25933 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
25935 putYMMRegLoAndZU(rD, mkexpr(tD));
25936 goto decode_success;
25938 break;
25940 case 0x70:
25941 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
25942 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25943 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
25944 goto decode_success;
25946 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
25947 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25948 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
25949 goto decode_success;
25951 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
25952 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25953 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25954 True/*isAvx*/, False/*!xIsH*/ );
25955 goto decode_success;
25957 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
25958 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25959 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
25960 goto decode_success;
25962 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
25963 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25964 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25965 True/*isAvx*/, True/*xIsH*/ );
25966 goto decode_success;
25968 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
25969 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25970 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
25971 goto decode_success;
25973 break;
25975 case 0x71:
25976 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
25977 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
25978 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
25979 if (have66noF2noF3(pfx)
25980 && 0==getVexL(pfx)/*128*/
25981 && epartIsReg(getUChar(delta))) {
25982 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25983 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25984 "vpsrlw", Iop_ShrN16x8 );
25985 *uses_vvvv = True;
25986 goto decode_success;
25988 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25989 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25990 "vpsraw", Iop_SarN16x8 );
25991 *uses_vvvv = True;
25992 goto decode_success;
25994 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25995 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25996 "vpsllw", Iop_ShlN16x8 );
25997 *uses_vvvv = True;
25998 goto decode_success;
26000 /* else fall through */
26002 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26003 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26004 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26005 if (have66noF2noF3(pfx)
26006 && 1==getVexL(pfx)/*256*/
26007 && epartIsReg(getUChar(delta))) {
26008 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26009 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26010 "vpsrlw", Iop_ShrN16x16 );
26011 *uses_vvvv = True;
26012 goto decode_success;
26014 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26015 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26016 "vpsraw", Iop_SarN16x16 );
26017 *uses_vvvv = True;
26018 goto decode_success;
26020 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26021 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26022 "vpsllw", Iop_ShlN16x16 );
26023 *uses_vvvv = True;
26024 goto decode_success;
26026 /* else fall through */
26028 break;
26030 case 0x72:
26031 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26032 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26033 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26034 if (have66noF2noF3(pfx)
26035 && 0==getVexL(pfx)/*128*/
26036 && epartIsReg(getUChar(delta))) {
26037 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26038 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26039 "vpsrld", Iop_ShrN32x4 );
26040 *uses_vvvv = True;
26041 goto decode_success;
26043 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26044 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26045 "vpsrad", Iop_SarN32x4 );
26046 *uses_vvvv = True;
26047 goto decode_success;
26049 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26050 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26051 "vpslld", Iop_ShlN32x4 );
26052 *uses_vvvv = True;
26053 goto decode_success;
26055 /* else fall through */
26057 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26058 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26059 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26060 if (have66noF2noF3(pfx)
26061 && 1==getVexL(pfx)/*256*/
26062 && epartIsReg(getUChar(delta))) {
26063 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26064 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26065 "vpsrld", Iop_ShrN32x8 );
26066 *uses_vvvv = True;
26067 goto decode_success;
26069 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26070 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26071 "vpsrad", Iop_SarN32x8 );
26072 *uses_vvvv = True;
26073 goto decode_success;
26075 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26076 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26077 "vpslld", Iop_ShlN32x8 );
26078 *uses_vvvv = True;
26079 goto decode_success;
26081 /* else fall through */
26083 break;
26085 case 0x73:
26086 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26087 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26088 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26089 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26090 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26091 && epartIsReg(getUChar(delta))) {
26092 Int rS = eregOfRexRM(pfx,getUChar(delta));
26093 Int rD = getVexNvvvv(pfx);
26094 IRTemp vecS = newTemp(Ity_V128);
26095 if (gregLO3ofRM(getUChar(delta)) == 3) {
26096 Int imm = (Int)getUChar(delta+1);
26097 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26098 delta += 2;
26099 assign( vecS, getXMMReg(rS) );
26100 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
26101 *uses_vvvv = True;
26102 goto decode_success;
26104 if (gregLO3ofRM(getUChar(delta)) == 7) {
26105 Int imm = (Int)getUChar(delta+1);
26106 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26107 delta += 2;
26108 assign( vecS, getXMMReg(rS) );
26109 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
26110 *uses_vvvv = True;
26111 goto decode_success;
26113 if (gregLO3ofRM(getUChar(delta)) == 2) {
26114 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26115 "vpsrlq", Iop_ShrN64x2 );
26116 *uses_vvvv = True;
26117 goto decode_success;
26119 if (gregLO3ofRM(getUChar(delta)) == 6) {
26120 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26121 "vpsllq", Iop_ShlN64x2 );
26122 *uses_vvvv = True;
26123 goto decode_success;
26125 /* else fall through */
26127 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26128 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26129 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26130 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26131 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
26132 && epartIsReg(getUChar(delta))) {
26133 Int rS = eregOfRexRM(pfx,getUChar(delta));
26134 Int rD = getVexNvvvv(pfx);
26135 if (gregLO3ofRM(getUChar(delta)) == 3) {
26136 IRTemp vecS0 = newTemp(Ity_V128);
26137 IRTemp vecS1 = newTemp(Ity_V128);
26138 Int imm = (Int)getUChar(delta+1);
26139 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26140 delta += 2;
26141 assign( vecS0, getYMMRegLane128(rS, 0));
26142 assign( vecS1, getYMMRegLane128(rS, 1));
26143 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
26144 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
26145 *uses_vvvv = True;
26146 goto decode_success;
26148 if (gregLO3ofRM(getUChar(delta)) == 7) {
26149 IRTemp vecS0 = newTemp(Ity_V128);
26150 IRTemp vecS1 = newTemp(Ity_V128);
26151 Int imm = (Int)getUChar(delta+1);
26152 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26153 delta += 2;
26154 assign( vecS0, getYMMRegLane128(rS, 0));
26155 assign( vecS1, getYMMRegLane128(rS, 1));
26156 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
26157 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
26158 *uses_vvvv = True;
26159 goto decode_success;
26161 if (gregLO3ofRM(getUChar(delta)) == 2) {
26162 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26163 "vpsrlq", Iop_ShrN64x4 );
26164 *uses_vvvv = True;
26165 goto decode_success;
26167 if (gregLO3ofRM(getUChar(delta)) == 6) {
26168 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26169 "vpsllq", Iop_ShlN64x4 );
26170 *uses_vvvv = True;
26171 goto decode_success;
26173 /* else fall through */
26175 break;
26177 case 0x74:
26178 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26179 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26180 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26181 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26182 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
26183 goto decode_success;
26185 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26186 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26187 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26188 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26189 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
26190 goto decode_success;
26192 break;
26194 case 0x75:
26195 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26196 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26197 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26198 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26199 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
26200 goto decode_success;
26202 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26203 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26204 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26205 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26206 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
26207 goto decode_success;
26209 break;
26211 case 0x76:
26212 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26213 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26214 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26215 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26216 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
26217 goto decode_success;
26219 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26220 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26221 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26222 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26223 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
26224 goto decode_success;
26226 break;
26228 case 0x77:
26229 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26230 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26231 Int i;
26232 IRTemp zero128 = newTemp(Ity_V128);
26233 assign(zero128, mkV128(0));
26234 for (i = 0; i < 16; i++) {
26235 putYMMRegLane128(i, 1, mkexpr(zero128));
26237 DIP("vzeroupper\n");
26238 goto decode_success;
26240 /* VZEROALL = VEX.256.0F.WIG 77 */
26241 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26242 Int i;
26243 IRTemp zero128 = newTemp(Ity_V128);
26244 assign(zero128, mkV128(0));
26245 for (i = 0; i < 16; i++) {
26246 putYMMRegLoAndZU(i, mkexpr(zero128));
26248 DIP("vzeroall\n");
26249 goto decode_success;
26251 break;
26253 case 0x7C:
26254 case 0x7D:
26255 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26256 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26257 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26258 IRTemp sV = newTemp(Ity_V128);
26259 IRTemp dV = newTemp(Ity_V128);
26260 Bool isAdd = opc == 0x7C;
26261 const HChar* str = isAdd ? "add" : "sub";
26262 UChar modrm = getUChar(delta);
26263 UInt rG = gregOfRexRM(pfx,modrm);
26264 UInt rV = getVexNvvvv(pfx);
26265 if (epartIsReg(modrm)) {
26266 UInt rE = eregOfRexRM(pfx,modrm);
26267 assign( sV, getXMMReg(rE) );
26268 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26269 nameXMMReg(rV), nameXMMReg(rG));
26270 delta += 1;
26271 } else {
26272 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26273 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26274 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26275 nameXMMReg(rV), nameXMMReg(rG));
26276 delta += alen;
26278 assign( dV, getXMMReg(rV) );
26279 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
26280 *uses_vvvv = True;
26281 goto decode_success;
26283 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26284 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26285 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26286 IRTemp sV = newTemp(Ity_V256);
26287 IRTemp dV = newTemp(Ity_V256);
26288 IRTemp s1, s0, d1, d0;
26289 Bool isAdd = opc == 0x7C;
26290 const HChar* str = isAdd ? "add" : "sub";
26291 UChar modrm = getUChar(delta);
26292 UInt rG = gregOfRexRM(pfx,modrm);
26293 UInt rV = getVexNvvvv(pfx);
26294 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26295 if (epartIsReg(modrm)) {
26296 UInt rE = eregOfRexRM(pfx,modrm);
26297 assign( sV, getYMMReg(rE) );
26298 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26299 nameYMMReg(rV), nameYMMReg(rG));
26300 delta += 1;
26301 } else {
26302 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26303 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26304 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26305 nameYMMReg(rV), nameYMMReg(rG));
26306 delta += alen;
26308 assign( dV, getYMMReg(rV) );
26309 breakupV256toV128s( dV, &d1, &d0 );
26310 breakupV256toV128s( sV, &s1, &s0 );
26311 putYMMReg( rG, binop(Iop_V128HLtoV256,
26312 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
26313 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
26314 *uses_vvvv = True;
26315 goto decode_success;
26317 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26318 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26319 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26320 IRTemp sV = newTemp(Ity_V128);
26321 IRTemp dV = newTemp(Ity_V128);
26322 Bool isAdd = opc == 0x7C;
26323 const HChar* str = isAdd ? "add" : "sub";
26324 UChar modrm = getUChar(delta);
26325 UInt rG = gregOfRexRM(pfx,modrm);
26326 UInt rV = getVexNvvvv(pfx);
26327 if (epartIsReg(modrm)) {
26328 UInt rE = eregOfRexRM(pfx,modrm);
26329 assign( sV, getXMMReg(rE) );
26330 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26331 nameXMMReg(rV), nameXMMReg(rG));
26332 delta += 1;
26333 } else {
26334 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26335 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26336 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26337 nameXMMReg(rV), nameXMMReg(rG));
26338 delta += alen;
26340 assign( dV, getXMMReg(rV) );
26341 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
26342 *uses_vvvv = True;
26343 goto decode_success;
26345 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26346 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26347 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26348 IRTemp sV = newTemp(Ity_V256);
26349 IRTemp dV = newTemp(Ity_V256);
26350 IRTemp s1, s0, d1, d0;
26351 Bool isAdd = opc == 0x7C;
26352 const HChar* str = isAdd ? "add" : "sub";
26353 UChar modrm = getUChar(delta);
26354 UInt rG = gregOfRexRM(pfx,modrm);
26355 UInt rV = getVexNvvvv(pfx);
26356 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26357 if (epartIsReg(modrm)) {
26358 UInt rE = eregOfRexRM(pfx,modrm);
26359 assign( sV, getYMMReg(rE) );
26360 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26361 nameYMMReg(rV), nameYMMReg(rG));
26362 delta += 1;
26363 } else {
26364 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26365 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26366 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26367 nameYMMReg(rV), nameYMMReg(rG));
26368 delta += alen;
26370 assign( dV, getYMMReg(rV) );
26371 breakupV256toV128s( dV, &d1, &d0 );
26372 breakupV256toV128s( sV, &s1, &s0 );
26373 putYMMReg( rG, binop(Iop_V128HLtoV256,
26374 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
26375 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
26376 *uses_vvvv = True;
26377 goto decode_success;
26379 break;
26381 case 0x7E:
26382 /* Note the Intel docs don't make sense for this. I think they
26383 are wrong. They seem to imply it is a store when in fact I
26384 think it is a load. Also it's unclear whether this is W0, W1
26385 or WIG. */
26386 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26387 if (haveF3no66noF2(pfx)
26388 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26389 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
26390 UChar modrm = getUChar(delta);
26391 UInt rG = gregOfRexRM(pfx,modrm);
26392 if (epartIsReg(modrm)) {
26393 UInt rE = eregOfRexRM(pfx,modrm);
26394 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
26395 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
26396 delta += 1;
26397 } else {
26398 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26399 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
26400 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26401 delta += alen;
26403 /* zero bits 255:64 */
26404 putXMMRegLane64( rG, 1, mkU64(0) );
26405 putYMMRegLane128( rG, 1, mkV128(0) );
26406 goto decode_success;
26408 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26409 /* Moves from G to E, so is a store-form insn */
26410 /* Intel docs list this in the VMOVD entry for some reason. */
26411 if (have66noF2noF3(pfx)
26412 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26413 UChar modrm = getUChar(delta);
26414 UInt rG = gregOfRexRM(pfx,modrm);
26415 if (epartIsReg(modrm)) {
26416 UInt rE = eregOfRexRM(pfx,modrm);
26417 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
26418 putIReg64(rE, getXMMRegLane64(rG, 0));
26419 delta += 1;
26420 } else {
26421 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26422 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
26423 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26424 delta += alen;
26426 goto decode_success;
26428 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26429 /* Moves from G to E, so is a store-form insn */
26430 if (have66noF2noF3(pfx)
26431 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26432 UChar modrm = getUChar(delta);
26433 UInt rG = gregOfRexRM(pfx,modrm);
26434 if (epartIsReg(modrm)) {
26435 UInt rE = eregOfRexRM(pfx,modrm);
26436 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
26437 putIReg32(rE, getXMMRegLane32(rG, 0));
26438 delta += 1;
26439 } else {
26440 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26441 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
26442 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
26443 delta += alen;
26445 goto decode_success;
26447 break;
26449 case 0x7F:
26450 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26451 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26452 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26453 && 1==getVexL(pfx)/*256*/) {
26454 UChar modrm = getUChar(delta);
26455 UInt rS = gregOfRexRM(pfx, modrm);
26456 IRTemp tS = newTemp(Ity_V256);
26457 Bool isA = have66noF2noF3(pfx);
26458 HChar ch = isA ? 'a' : 'u';
26459 assign(tS, getYMMReg(rS));
26460 if (epartIsReg(modrm)) {
26461 UInt rD = eregOfRexRM(pfx, modrm);
26462 delta += 1;
26463 putYMMReg(rD, mkexpr(tS));
26464 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26465 } else {
26466 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26467 delta += alen;
26468 if (isA)
26469 gen_SEGV_if_not_32_aligned(addr);
26470 storeLE(mkexpr(addr), mkexpr(tS));
26471 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
26473 goto decode_success;
26475 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26476 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26477 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26478 && 0==getVexL(pfx)/*128*/) {
26479 UChar modrm = getUChar(delta);
26480 UInt rS = gregOfRexRM(pfx, modrm);
26481 IRTemp tS = newTemp(Ity_V128);
26482 Bool isA = have66noF2noF3(pfx);
26483 HChar ch = isA ? 'a' : 'u';
26484 assign(tS, getXMMReg(rS));
26485 if (epartIsReg(modrm)) {
26486 UInt rD = eregOfRexRM(pfx, modrm);
26487 delta += 1;
26488 putYMMRegLoAndZU(rD, mkexpr(tS));
26489 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26490 } else {
26491 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26492 delta += alen;
26493 if (isA)
26494 gen_SEGV_if_not_16_aligned(addr);
26495 storeLE(mkexpr(addr), mkexpr(tS));
26496 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
26498 goto decode_success;
26500 break;
26502 case 0xAE:
26503 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26504 if (haveNo66noF2noF3(pfx)
26505 && 0==getVexL(pfx)/*LZ*/
26506 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26507 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
26508 && sz == 4) {
26509 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
26510 goto decode_success;
26512 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26513 if (haveNo66noF2noF3(pfx)
26514 && 0==getVexL(pfx)/*LZ*/
26515 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26516 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
26517 && sz == 4) {
26518 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
26519 goto decode_success;
26521 break;
26523 case 0xC2:
26524 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26525 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26526 if (haveF2no66noF3(pfx)) {
26527 Long delta0 = delta;
26528 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26529 "vcmpsd", False/*!all_lanes*/,
26530 8/*sz*/);
26531 if (delta > delta0) goto decode_success;
26532 /* else fall through -- decoding has failed */
26534 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26535 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26536 if (haveF3no66noF2(pfx)) {
26537 Long delta0 = delta;
26538 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26539 "vcmpss", False/*!all_lanes*/,
26540 4/*sz*/);
26541 if (delta > delta0) goto decode_success;
26542 /* else fall through -- decoding has failed */
26544 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26545 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26546 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26547 Long delta0 = delta;
26548 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26549 "vcmppd", True/*all_lanes*/,
26550 8/*sz*/);
26551 if (delta > delta0) goto decode_success;
26552 /* else fall through -- decoding has failed */
26554 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26555 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26556 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26557 Long delta0 = delta;
26558 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26559 "vcmppd", 8/*sz*/);
26560 if (delta > delta0) goto decode_success;
26561 /* else fall through -- decoding has failed */
26563 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26564 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26565 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26566 Long delta0 = delta;
26567 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26568 "vcmpps", True/*all_lanes*/,
26569 4/*sz*/);
26570 if (delta > delta0) goto decode_success;
26571 /* else fall through -- decoding has failed */
26573 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26574 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26575 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26576 Long delta0 = delta;
26577 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26578 "vcmpps", 4/*sz*/);
26579 if (delta > delta0) goto decode_success;
26580 /* else fall through -- decoding has failed */
26582 break;
26584 case 0xC4:
26585 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26586 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26587 UChar modrm = getUChar(delta);
26588 UInt rG = gregOfRexRM(pfx, modrm);
26589 UInt rV = getVexNvvvv(pfx);
26590 Int imm8;
26591 IRTemp new16 = newTemp(Ity_I16);
26593 if ( epartIsReg( modrm ) ) {
26594 imm8 = (Int)(getUChar(delta+1) & 7);
26595 assign( new16, unop(Iop_32to16,
26596 getIReg32(eregOfRexRM(pfx,modrm))) );
26597 delta += 1+1;
26598 DIP( "vpinsrw $%d,%s,%s\n", imm8,
26599 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
26600 } else {
26601 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26602 imm8 = (Int)(getUChar(delta+alen) & 7);
26603 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
26604 delta += alen+1;
26605 DIP( "vpinsrw $%d,%s,%s\n",
26606 imm8, dis_buf, nameXMMReg(rG) );
26609 IRTemp src_vec = newTemp(Ity_V128);
26610 assign(src_vec, getXMMReg( rV ));
26611 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
26612 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26613 *uses_vvvv = True;
26614 goto decode_success;
26616 break;
26618 case 0xC5:
26619 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26620 if (have66noF2noF3(pfx)
26621 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26622 Long delta0 = delta;
26623 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
26624 True/*isAvx*/ );
26625 if (delta > delta0) goto decode_success;
26626 /* else fall through -- decoding has failed */
26628 break;
26630 case 0xC6:
26631 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26632 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26633 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26634 Int imm8 = 0;
26635 IRTemp eV = newTemp(Ity_V128);
26636 IRTemp vV = newTemp(Ity_V128);
26637 UInt modrm = getUChar(delta);
26638 UInt rG = gregOfRexRM(pfx,modrm);
26639 UInt rV = getVexNvvvv(pfx);
26640 assign( vV, getXMMReg(rV) );
26641 if (epartIsReg(modrm)) {
26642 UInt rE = eregOfRexRM(pfx,modrm);
26643 assign( eV, getXMMReg(rE) );
26644 imm8 = (Int)getUChar(delta+1);
26645 delta += 1+1;
26646 DIP("vshufps $%d,%s,%s,%s\n",
26647 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26648 } else {
26649 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26650 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26651 imm8 = (Int)getUChar(delta+alen);
26652 delta += 1+alen;
26653 DIP("vshufps $%d,%s,%s,%s\n",
26654 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26656 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
26657 putYMMRegLoAndZU( rG, mkexpr(res) );
26658 *uses_vvvv = True;
26659 goto decode_success;
26661 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26662 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26663 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26664 Int imm8 = 0;
26665 IRTemp eV = newTemp(Ity_V256);
26666 IRTemp vV = newTemp(Ity_V256);
26667 UInt modrm = getUChar(delta);
26668 UInt rG = gregOfRexRM(pfx,modrm);
26669 UInt rV = getVexNvvvv(pfx);
26670 assign( vV, getYMMReg(rV) );
26671 if (epartIsReg(modrm)) {
26672 UInt rE = eregOfRexRM(pfx,modrm);
26673 assign( eV, getYMMReg(rE) );
26674 imm8 = (Int)getUChar(delta+1);
26675 delta += 1+1;
26676 DIP("vshufps $%d,%s,%s,%s\n",
26677 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26678 } else {
26679 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26680 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26681 imm8 = (Int)getUChar(delta+alen);
26682 delta += 1+alen;
26683 DIP("vshufps $%d,%s,%s,%s\n",
26684 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26686 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26687 putYMMReg( rG, mkexpr(res) );
26688 *uses_vvvv = True;
26689 goto decode_success;
26691 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26692 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26693 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26694 Int imm8 = 0;
26695 IRTemp eV = newTemp(Ity_V128);
26696 IRTemp vV = newTemp(Ity_V128);
26697 UInt modrm = getUChar(delta);
26698 UInt rG = gregOfRexRM(pfx,modrm);
26699 UInt rV = getVexNvvvv(pfx);
26700 assign( vV, getXMMReg(rV) );
26701 if (epartIsReg(modrm)) {
26702 UInt rE = eregOfRexRM(pfx,modrm);
26703 assign( eV, getXMMReg(rE) );
26704 imm8 = (Int)getUChar(delta+1);
26705 delta += 1+1;
26706 DIP("vshufpd $%d,%s,%s,%s\n",
26707 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26708 } else {
26709 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26710 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26711 imm8 = (Int)getUChar(delta+alen);
26712 delta += 1+alen;
26713 DIP("vshufpd $%d,%s,%s,%s\n",
26714 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26716 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26717 putYMMRegLoAndZU( rG, mkexpr(res) );
26718 *uses_vvvv = True;
26719 goto decode_success;
26721 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26722 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26723 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26724 Int imm8 = 0;
26725 IRTemp eV = newTemp(Ity_V256);
26726 IRTemp vV = newTemp(Ity_V256);
26727 UInt modrm = getUChar(delta);
26728 UInt rG = gregOfRexRM(pfx,modrm);
26729 UInt rV = getVexNvvvv(pfx);
26730 assign( vV, getYMMReg(rV) );
26731 if (epartIsReg(modrm)) {
26732 UInt rE = eregOfRexRM(pfx,modrm);
26733 assign( eV, getYMMReg(rE) );
26734 imm8 = (Int)getUChar(delta+1);
26735 delta += 1+1;
26736 DIP("vshufpd $%d,%s,%s,%s\n",
26737 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26738 } else {
26739 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26740 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26741 imm8 = (Int)getUChar(delta+alen);
26742 delta += 1+alen;
26743 DIP("vshufpd $%d,%s,%s,%s\n",
26744 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26746 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26747 putYMMReg( rG, mkexpr(res) );
26748 *uses_vvvv = True;
26749 goto decode_success;
26751 break;
26753 case 0xD0:
26754 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26755 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26756 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26757 uses_vvvv, vbi, pfx, delta,
26758 "vaddsubpd", math_ADDSUBPD_128 );
26759 goto decode_success;
26761 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26762 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26763 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26764 uses_vvvv, vbi, pfx, delta,
26765 "vaddsubpd", math_ADDSUBPD_256 );
26766 goto decode_success;
26768 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26769 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26770 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26771 uses_vvvv, vbi, pfx, delta,
26772 "vaddsubps", math_ADDSUBPS_128 );
26773 goto decode_success;
26775 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26776 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26777 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26778 uses_vvvv, vbi, pfx, delta,
26779 "vaddsubps", math_ADDSUBPS_256 );
26780 goto decode_success;
26782 break;
26784 case 0xD1:
26785 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26786 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26787 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26788 "vpsrlw", Iop_ShrN16x8 );
26789 *uses_vvvv = True;
26790 goto decode_success;
26793 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26794 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26795 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26796 "vpsrlw", Iop_ShrN16x16 );
26797 *uses_vvvv = True;
26798 goto decode_success;
26801 break;
26803 case 0xD2:
26804 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26805 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26806 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26807 "vpsrld", Iop_ShrN32x4 );
26808 *uses_vvvv = True;
26809 goto decode_success;
26811 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26812 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26813 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26814 "vpsrld", Iop_ShrN32x8 );
26815 *uses_vvvv = True;
26816 goto decode_success;
26818 break;
26820 case 0xD3:
26821 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26822 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26823 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26824 "vpsrlq", Iop_ShrN64x2 );
26825 *uses_vvvv = True;
26826 goto decode_success;
26828 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26829 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26830 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26831 "vpsrlq", Iop_ShrN64x4 );
26832 *uses_vvvv = True;
26833 goto decode_success;
26835 break;
26837 case 0xD4:
26838 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26839 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26840 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26841 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26842 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26843 goto decode_success;
26845 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26846 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26847 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26848 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26849 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26850 goto decode_success;
26852 break;
26854 case 0xD5:
26855 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26856 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26857 delta = dis_AVX128_E_V_to_G(
26858 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
26859 goto decode_success;
26861 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26862 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26863 delta = dis_AVX256_E_V_to_G(
26864 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
26865 goto decode_success;
26867 break;
26869 case 0xD6:
26870 /* I can't even find any Intel docs for this one. */
26871 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26872 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26873 (WIG, maybe?) */
26874 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26875 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
26876 UChar modrm = getUChar(delta);
26877 UInt rG = gregOfRexRM(pfx,modrm);
26878 if (epartIsReg(modrm)) {
26879 /* fall through, awaiting test case */
26880 /* dst: lo half copied, hi half zeroed */
26881 } else {
26882 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26883 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
26884 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
26885 delta += alen;
26886 goto decode_success;
26889 break;
26891 case 0xD7:
26892 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
26893 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26894 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
26895 goto decode_success;
26897 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
26898 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26899 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
26900 goto decode_success;
26902 break;
26904 case 0xD8:
26905 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
26906 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26907 delta = dis_AVX128_E_V_to_G(
26908 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
26909 goto decode_success;
26911 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
26912 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26913 delta = dis_AVX256_E_V_to_G(
26914 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
26915 goto decode_success;
26917 break;
26919 case 0xD9:
26920 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
26921 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26922 delta = dis_AVX128_E_V_to_G(
26923 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
26924 goto decode_success;
26926 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
26927 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26928 delta = dis_AVX256_E_V_to_G(
26929 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
26930 goto decode_success;
26932 break;
26934 case 0xDA:
26935 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
26936 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26937 delta = dis_AVX128_E_V_to_G(
26938 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
26939 goto decode_success;
26941 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
26942 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26943 delta = dis_AVX256_E_V_to_G(
26944 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
26945 goto decode_success;
26947 break;
26949 case 0xDB:
26950 /* VPAND r/m, rV, r ::: r = rV & r/m */
26951 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
26952 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26953 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26954 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
26955 goto decode_success;
26957 /* VPAND r/m, rV, r ::: r = rV & r/m */
26958 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
26959 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26960 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26961 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
26962 goto decode_success;
26964 break;
26966 case 0xDC:
26967 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
26968 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26969 delta = dis_AVX128_E_V_to_G(
26970 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
26971 goto decode_success;
26973 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
26974 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26975 delta = dis_AVX256_E_V_to_G(
26976 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
26977 goto decode_success;
26979 break;
26981 case 0xDD:
26982 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
26983 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26984 delta = dis_AVX128_E_V_to_G(
26985 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
26986 goto decode_success;
26988 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
26989 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26990 delta = dis_AVX256_E_V_to_G(
26991 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
26992 goto decode_success;
26994 break;
26996 case 0xDE:
26997 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
26998 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26999 delta = dis_AVX128_E_V_to_G(
27000 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
27001 goto decode_success;
27003 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27004 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27005 delta = dis_AVX256_E_V_to_G(
27006 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
27007 goto decode_success;
27009 break;
27011 case 0xDF:
27012 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27013 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27014 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27015 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27016 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
27017 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27018 goto decode_success;
27020 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27021 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27022 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27023 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27024 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
27025 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27026 goto decode_success;
27028 break;
27030 case 0xE0:
27031 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27032 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27033 delta = dis_AVX128_E_V_to_G(
27034 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
27035 goto decode_success;
27037 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27038 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27039 delta = dis_AVX256_E_V_to_G(
27040 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
27041 goto decode_success;
27043 break;
27045 case 0xE1:
27046 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27047 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27048 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27049 "vpsraw", Iop_SarN16x8 );
27050 *uses_vvvv = True;
27051 goto decode_success;
27053 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27054 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27055 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27056 "vpsraw", Iop_SarN16x16 );
27057 *uses_vvvv = True;
27058 goto decode_success;
27060 break;
27062 case 0xE2:
27063 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27064 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27065 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27066 "vpsrad", Iop_SarN32x4 );
27067 *uses_vvvv = True;
27068 goto decode_success;
27070 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27071 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27072 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27073 "vpsrad", Iop_SarN32x8 );
27074 *uses_vvvv = True;
27075 goto decode_success;
27077 break;
27079 case 0xE3:
27080 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27081 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27082 delta = dis_AVX128_E_V_to_G(
27083 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
27084 goto decode_success;
27086 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27087 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27088 delta = dis_AVX256_E_V_to_G(
27089 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
27090 goto decode_success;
27092 break;
27094 case 0xE4:
27095 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27096 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27097 delta = dis_AVX128_E_V_to_G(
27098 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
27099 goto decode_success;
27101 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27102 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27103 delta = dis_AVX256_E_V_to_G(
27104 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
27105 goto decode_success;
27107 break;
27109 case 0xE5:
27110 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27111 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27112 delta = dis_AVX128_E_V_to_G(
27113 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
27114 goto decode_success;
27116 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27117 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27118 delta = dis_AVX256_E_V_to_G(
27119 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
27120 goto decode_success;
27122 break;
27124 case 0xE6:
27125 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27126 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
27127 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
27128 goto decode_success;
27130 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27131 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
27132 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
27133 goto decode_success;
27135 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27136 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27137 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27138 True/*r2zero*/);
27139 goto decode_success;
27141 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27142 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27143 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
27144 goto decode_success;
27146 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27147 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27148 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27149 False/*!r2zero*/);
27150 goto decode_success;
27152 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27153 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27154 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
27155 goto decode_success;
27157 break;
27159 case 0xE7:
27160 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27161 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27162 UChar modrm = getUChar(delta);
27163 UInt rG = gregOfRexRM(pfx,modrm);
27164 if (!epartIsReg(modrm)) {
27165 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27166 gen_SEGV_if_not_16_aligned( addr );
27167 storeLE( mkexpr(addr), getXMMReg(rG) );
27168 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
27169 delta += alen;
27170 goto decode_success;
27172 /* else fall through */
27174 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27175 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27176 UChar modrm = getUChar(delta);
27177 UInt rG = gregOfRexRM(pfx,modrm);
27178 if (!epartIsReg(modrm)) {
27179 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27180 gen_SEGV_if_not_32_aligned( addr );
27181 storeLE( mkexpr(addr), getYMMReg(rG) );
27182 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
27183 delta += alen;
27184 goto decode_success;
27186 /* else fall through */
27188 break;
27190 case 0xE8:
27191 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27192 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27193 delta = dis_AVX128_E_V_to_G(
27194 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
27195 goto decode_success;
27197 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27198 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27199 delta = dis_AVX256_E_V_to_G(
27200 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
27201 goto decode_success;
27203 break;
27205 case 0xE9:
27206 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27207 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27208 delta = dis_AVX128_E_V_to_G(
27209 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
27210 goto decode_success;
27212 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27213 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27214 delta = dis_AVX256_E_V_to_G(
27215 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
27216 goto decode_success;
27218 break;
27220 case 0xEA:
27221 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27222 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27223 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27224 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27225 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
27226 goto decode_success;
27228 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27229 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27230 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27231 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27232 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
27233 goto decode_success;
27235 break;
27237 case 0xEB:
27238 /* VPOR r/m, rV, r ::: r = rV | r/m */
27239 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27240 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27241 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27242 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
27243 goto decode_success;
27245 /* VPOR r/m, rV, r ::: r = rV | r/m */
27246 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27247 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27248 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27249 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
27250 goto decode_success;
27252 break;
27254 case 0xEC:
27255 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27256 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27257 delta = dis_AVX128_E_V_to_G(
27258 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
27259 goto decode_success;
27261 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27262 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27263 delta = dis_AVX256_E_V_to_G(
27264 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
27265 goto decode_success;
27267 break;
27269 case 0xED:
27270 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27271 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27272 delta = dis_AVX128_E_V_to_G(
27273 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
27274 goto decode_success;
27276 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27277 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27278 delta = dis_AVX256_E_V_to_G(
27279 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
27280 goto decode_success;
27282 break;
27284 case 0xEE:
27285 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27286 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27287 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27288 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27289 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
27290 goto decode_success;
27292 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27293 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27294 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27295 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27296 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
27297 goto decode_success;
27299 break;
27301 case 0xEF:
27302 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27303 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27304 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27305 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27306 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
27307 goto decode_success;
27309 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27310 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27311 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27312 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27313 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
27314 goto decode_success;
27316 break;
27318 case 0xF0:
27319 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27320 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27321 UChar modrm = getUChar(delta);
27322 UInt rD = gregOfRexRM(pfx, modrm);
27323 IRTemp tD = newTemp(Ity_V256);
27324 if (epartIsReg(modrm)) break;
27325 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27326 delta += alen;
27327 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
27328 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
27329 putYMMReg(rD, mkexpr(tD));
27330 goto decode_success;
27332 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27333 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27334 UChar modrm = getUChar(delta);
27335 UInt rD = gregOfRexRM(pfx, modrm);
27336 IRTemp tD = newTemp(Ity_V128);
27337 if (epartIsReg(modrm)) break;
27338 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27339 delta += alen;
27340 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
27341 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
27342 putYMMRegLoAndZU(rD, mkexpr(tD));
27343 goto decode_success;
27345 break;
27347 case 0xF1:
27348 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27349 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27350 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27351 "vpsllw", Iop_ShlN16x8 );
27352 *uses_vvvv = True;
27353 goto decode_success;
27356 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27357 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27358 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27359 "vpsllw", Iop_ShlN16x16 );
27360 *uses_vvvv = True;
27361 goto decode_success;
27364 break;
27366 case 0xF2:
27367 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27368 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27369 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27370 "vpslld", Iop_ShlN32x4 );
27371 *uses_vvvv = True;
27372 goto decode_success;
27374 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27375 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27376 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27377 "vpslld", Iop_ShlN32x8 );
27378 *uses_vvvv = True;
27379 goto decode_success;
27381 break;
27383 case 0xF3:
27384 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27385 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27386 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27387 "vpsllq", Iop_ShlN64x2 );
27388 *uses_vvvv = True;
27389 goto decode_success;
27391 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27392 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27393 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27394 "vpsllq", Iop_ShlN64x4 );
27395 *uses_vvvv = True;
27396 goto decode_success;
27398 break;
27400 case 0xF4:
27401 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27402 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27403 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27404 uses_vvvv, vbi, pfx, delta,
27405 "vpmuludq", math_PMULUDQ_128 );
27406 goto decode_success;
27408 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27409 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27410 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27411 uses_vvvv, vbi, pfx, delta,
27412 "vpmuludq", math_PMULUDQ_256 );
27413 goto decode_success;
27415 break;
27417 case 0xF5:
27418 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27419 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27420 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27421 uses_vvvv, vbi, pfx, delta,
27422 "vpmaddwd", math_PMADDWD_128 );
27423 goto decode_success;
27425 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27426 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27427 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27428 uses_vvvv, vbi, pfx, delta,
27429 "vpmaddwd", math_PMADDWD_256 );
27430 goto decode_success;
27432 break;
27434 case 0xF6:
27435 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27436 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27437 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27438 uses_vvvv, vbi, pfx, delta,
27439 "vpsadbw", math_PSADBW_128 );
27440 goto decode_success;
27442 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27443 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27444 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27445 uses_vvvv, vbi, pfx, delta,
27446 "vpsadbw", math_PSADBW_256 );
27447 goto decode_success;
27449 break;
27451 case 0xF7:
27452 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27453 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27454 && epartIsReg(getUChar(delta))) {
27455 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
27456 goto decode_success;
27458 break;
27460 case 0xF8:
27461 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27462 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27463 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27464 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27465 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
27466 goto decode_success;
27468 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27469 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27470 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27471 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27472 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
27473 goto decode_success;
27475 break;
27477 case 0xF9:
27478 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27479 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27480 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27481 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27482 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
27483 goto decode_success;
27485 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27486 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27487 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27488 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27489 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
27490 goto decode_success;
27492 break;
27494 case 0xFA:
27495 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27496 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27497 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27498 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27499 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
27500 goto decode_success;
27502 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27503 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27504 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27505 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27506 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
27507 goto decode_success;
27509 break;
27511 case 0xFB:
27512 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27513 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27514 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27515 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27516 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
27517 goto decode_success;
27519 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27520 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27521 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27522 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27523 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
27524 goto decode_success;
27526 break;
27528 case 0xFC:
27529 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27530 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27531 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27532 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27533 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
27534 goto decode_success;
27536 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27537 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27538 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27539 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27540 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
27541 goto decode_success;
27543 break;
27545 case 0xFD:
27546 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27547 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27548 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27549 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27550 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
27551 goto decode_success;
27553 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27554 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27555 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27556 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27557 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
27558 goto decode_success;
27560 break;
27562 case 0xFE:
27563 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27564 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27565 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27566 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27567 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
27568 goto decode_success;
27570 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27571 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27572 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27573 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27574 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
27575 goto decode_success;
27577 break;
27579 default:
27580 break;
27584 //decode_failure:
27585 return deltaIN;
27587 decode_success:
27588 return delta;
27592 /*------------------------------------------------------------*/
27593 /*--- ---*/
27594 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27595 /*--- ---*/
27596 /*------------------------------------------------------------*/
27598 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27600 /* In the control vector, zero out all but the bottom two bits of
27601 each 32-bit lane. */
27602 IRExpr* cv1 = binop(Iop_ShrN32x4,
27603 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
27604 mkU8(30));
27605 /* And use the resulting cleaned-up control vector as steering
27606 in a Perm operation. */
27607 IRTemp res = newTemp(Ity_V128);
27608 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
27609 return res;
27612 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27614 IRTemp dHi, dLo, cHi, cLo;
27615 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27616 breakupV256toV128s( dataV, &dHi, &dLo );
27617 breakupV256toV128s( ctrlV, &cHi, &cLo );
27618 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
27619 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
27620 IRTemp res = newTemp(Ity_V256);
27621 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27622 return res;
27625 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27627 /* No cleverness here .. */
27628 IRTemp dHi, dLo, cHi, cLo;
27629 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27630 breakupV128to64s( dataV, &dHi, &dLo );
27631 breakupV128to64s( ctrlV, &cHi, &cLo );
27632 IRExpr* rHi
27633 = IRExpr_ITE( unop(Iop_64to1,
27634 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
27635 mkexpr(dHi), mkexpr(dLo) );
27636 IRExpr* rLo
27637 = IRExpr_ITE( unop(Iop_64to1,
27638 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
27639 mkexpr(dHi), mkexpr(dLo) );
27640 IRTemp res = newTemp(Ity_V128);
27641 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
27642 return res;
27645 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27647 IRTemp dHi, dLo, cHi, cLo;
27648 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27649 breakupV256toV128s( dataV, &dHi, &dLo );
27650 breakupV256toV128s( ctrlV, &cHi, &cLo );
27651 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
27652 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
27653 IRTemp res = newTemp(Ity_V256);
27654 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27655 return res;
27658 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27660 /* In the control vector, zero out all but the bottom three bits of
27661 each 32-bit lane. */
27662 IRExpr* cv1 = binop(Iop_ShrN32x8,
27663 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27664 mkU8(29));
27665 /* And use the resulting cleaned-up control vector as steering
27666 in a Perm operation. */
27667 IRTemp res = newTemp(Ity_V256);
27668 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27669 return res;
27672 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
27673 const VexAbiInfo* vbi, Prefix pfx, Long delta,
27674 const HChar* opname, IROp op8 )
27676 HChar dis_buf[50];
27677 Int alen;
27678 Int size = getRexW(pfx) ? 8 : 4;
27679 IRType ty = szToITy(size);
27680 IRTemp src = newTemp(ty);
27681 IRTemp amt = newTemp(ty);
27682 UChar rm = getUChar(delta);
27684 assign( amt, getIRegV(size,pfx) );
27685 if (epartIsReg(rm)) {
27686 assign( src, getIRegE(size,pfx,rm) );
27687 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27688 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27689 delta++;
27690 } else {
27691 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27692 assign( src, loadLE(ty, mkexpr(addr)) );
27693 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27694 nameIRegG(size,pfx,rm));
27695 delta += alen;
27698 putIRegG( size, pfx, rm,
27699 binop(mkSizedOp(ty,op8), mkexpr(src),
27700 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27701 mkU(ty,8*size-1)))) );
27702 /* Flags aren't modified. */
27703 *uses_vvvv = True;
27704 return delta;
27708 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
27710 UChar modrm = getUChar(delta);
27711 UInt rG = gregOfRexRM(pfx, modrm);
27712 UInt rV = getVexNvvvv(pfx);
27713 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27714 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27715 IRType vty = scalar ? ty : (getVexL(pfx) ? Ity_V256 : Ity_V128);
27716 IRTemp addr = IRTemp_INVALID;
27717 HChar dis_buf[50];
27718 Int alen = 0;
27719 const HChar *name;
27720 const HChar *suffix;
27721 const HChar *order;
27722 Bool negateRes = False;
27723 Bool negateZeven = False;
27724 Bool negateZodd = False;
27725 UInt count = 0;
27727 switch (opc & 0xF) {
27728 case 0x6: name = "addsub"; negateZeven = True; break;
27729 case 0x7: name = "subadd"; negateZodd = True; break;
27730 case 0x8:
27731 case 0x9: name = "add"; break;
27732 case 0xA:
27733 case 0xB: name = "sub"; negateZeven = True; negateZodd = True;
27734 break;
27735 case 0xC:
27736 case 0xD: name = "add"; negateRes = True; negateZeven = True;
27737 negateZodd = True; break;
27738 case 0xE:
27739 case 0xF: name = "sub"; negateRes = True; break;
27740 default: vpanic("dis_FMA(amd64)"); break;
27742 switch (opc & 0xF0) {
27743 case 0x90: order = "132"; break;
27744 case 0xA0: order = "213"; break;
27745 case 0xB0: order = "231"; break;
27746 default: vpanic("dis_FMA(amd64)"); break;
27748 if (scalar) {
27749 suffix = ty == Ity_F64 ? "sd" : "ss";
27750 } else {
27751 suffix = ty == Ity_F64 ? "pd" : "ps";
27754 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27755 count = sizeofIRType(vty) / sizeofIRType(ty);
27756 vassert(count == 1 || count == 2 || count == 4 || count == 8);
27758 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27759 UInt i;
27760 IRExpr *sX[8], *sY[8], *sZ[8], *res[8];
27761 for (i = 0; i < 8; i++) sX[i] = sY[i] = sZ[i] = res[i] = NULL;
27763 IRExpr* (*getYMMRegLane)(UInt,Int)
27764 = ty == Ity_F32 ? getYMMRegLane32F : getYMMRegLane64F;
27765 void (*putYMMRegLane)(UInt,Int,IRExpr*)
27766 = ty == Ity_F32 ? putYMMRegLane32F : putYMMRegLane64F;
27768 for (i = 0; i < count; i++) {
27769 sX[i] = getYMMRegLane(rG, i);
27770 sZ[i] = getYMMRegLane(rV, i);
27773 if (epartIsReg(modrm)) {
27774 UInt rE = eregOfRexRM(pfx, modrm);
27775 delta += 1;
27776 for (i = 0; i < count; i++) {
27777 sY[i] = getYMMRegLane(rE, i);
27779 if (vty == Ity_V256) {
27780 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27781 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27782 nameYMMReg(rG));
27783 } else {
27784 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27785 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27786 nameXMMReg(rG));
27788 } else {
27789 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27790 delta += alen;
27791 for (i = 0; i < count; i++) {
27792 sY[i] = loadLE(ty, binop(Iop_Add64, mkexpr(addr),
27793 mkU64(i * sizeofIRType(ty))));
27795 if (vty == Ity_V256) {
27796 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27797 name, order, suffix, dis_buf, nameYMMReg(rV),
27798 nameYMMReg(rG));
27799 } else {
27800 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27801 name, order, suffix, dis_buf, nameXMMReg(rV),
27802 nameXMMReg(rG));
27806 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27807 order, swap them around. */
27809 # define COPY_ARR(_dst, _src) \
27810 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27812 if ((opc & 0xF0) != 0x90) {
27813 IRExpr* temp[8];
27814 COPY_ARR(temp, sX);
27815 if ((opc & 0xF0) == 0xA0) {
27816 COPY_ARR(sX, sZ);
27817 COPY_ARR(sZ, sY);
27818 COPY_ARR(sY, temp);
27819 } else {
27820 COPY_ARR(sX, sZ);
27821 COPY_ARR(sZ, temp);
27825 # undef COPY_ARR
27827 for (i = 0; i < count; i++) {
27828 IROp opNEG = ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32;
27829 if ((i & 1) ? negateZodd : negateZeven) {
27830 sZ[i] = unop(opNEG, sZ[i]);
27832 res[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27833 get_FAKE_roundingmode(), sX[i], sY[i], sZ[i]);
27834 if (negateRes) {
27835 res[i] = unop(opNEG, res[i]);
27839 for (i = 0; i < count; i++) {
27840 putYMMRegLane(rG, i, res[i]);
27843 switch (vty) {
27844 case Ity_F32: putYMMRegLane32(rG, 1, mkU32(0)); /*fallthru*/
27845 case Ity_F64: putYMMRegLane64(rG, 1, mkU64(0)); /*fallthru*/
27846 case Ity_V128: putYMMRegLane128(rG, 1, mkV128(0)); /*fallthru*/
27847 case Ity_V256: break;
27848 default: vassert(0);
27851 return delta;
27855 /* Masked load or masked store. */
27856 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
27857 Prefix pfx, Long delta,
27858 const HChar* opname, Bool isYMM, IRType ty,
27859 Bool isLoad )
27861 HChar dis_buf[50];
27862 Int alen, i;
27863 IRTemp addr;
27864 UChar modrm = getUChar(delta);
27865 UInt rG = gregOfRexRM(pfx,modrm);
27866 UInt rV = getVexNvvvv(pfx);
27868 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27869 delta += alen;
27871 /**/ if (isLoad && isYMM) {
27872 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
27874 else if (isLoad && !isYMM) {
27875 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
27878 else if (!isLoad && isYMM) {
27879 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf );
27881 else {
27882 vassert(!isLoad && !isYMM);
27883 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf );
27886 vassert(ty == Ity_I32 || ty == Ity_I64);
27887 Bool laneIs32 = ty == Ity_I32;
27889 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
27891 for (i = 0; i < nLanes; i++) {
27892 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63);
27893 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1);
27894 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64;
27895 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64;
27896 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i );
27898 IRTemp cond = newTemp(Ity_I1);
27899 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one));
27901 IRTemp data = newTemp(ty);
27902 IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
27903 mkU64(i * (laneIs32 ? 4 : 8)));
27904 if (isLoad) {
27905 stmt(
27906 IRStmt_LoadG(
27907 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
27908 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
27910 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
27911 } else {
27912 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
27913 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
27917 if (isLoad && !isYMM)
27918 putYMMRegLane128( rG, 1, mkV128(0) );
27920 *uses_vvvv = True;
27921 return delta;
27925 /* Gather. */
27926 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi,
27927 Prefix pfx, Long delta,
27928 const HChar* opname, Bool isYMM,
27929 Bool isVM64x, IRType ty )
27931 HChar dis_buf[50];
27932 Int alen, i, vscale, count1, count2;
27933 IRTemp addr;
27934 UChar modrm = getUChar(delta);
27935 UInt rG = gregOfRexRM(pfx,modrm);
27936 UInt rV = getVexNvvvv(pfx);
27937 UInt rI;
27938 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
27939 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
27940 IRTemp cond;
27941 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
27942 idxTy, &vscale );
27943 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
27944 return delta;
27945 if (dstTy == Ity_V256) {
27946 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
27947 } else {
27948 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
27950 delta += alen;
27952 if (ty == Ity_I32) {
27953 count1 = isYMM ? 8 : 4;
27954 count2 = isVM64x ? count1 / 2 : count1;
27955 } else {
27956 count1 = count2 = isYMM ? 4 : 2;
27959 /* First update the mask register to copies of the sign bit. */
27960 if (ty == Ity_I32) {
27961 if (isYMM)
27962 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
27963 else
27964 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
27965 } else {
27966 for (i = 0; i < count1; i++) {
27967 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
27968 mkU8(63)) );
27972 /* Next gather the individual elements. If any fault occurs, the
27973 corresponding mask element will be set and the loop stops. */
27974 for (i = 0; i < count2; i++) {
27975 IRExpr *expr, *addr_expr;
27976 cond = newTemp(Ity_I1);
27977 assign( cond,
27978 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
27979 ty == Ity_I32 ? getYMMRegLane32( rV, i )
27980 : getYMMRegLane64( rV, i ),
27981 mkU(ty, 0)) );
27982 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
27983 : getYMMRegLane64( rG, i );
27984 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
27985 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
27986 switch (vscale) {
27987 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
27988 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
27989 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
27990 default: break;
27992 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
27993 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
27994 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
27995 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
27996 if (ty == Ity_I32) {
27997 putYMMRegLane32( rG, i, expr );
27998 putYMMRegLane32( rV, i, mkU32(0) );
27999 } else {
28000 putYMMRegLane64( rG, i, expr);
28001 putYMMRegLane64( rV, i, mkU64(0) );
28005 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
28006 if (ty == Ity_I64 || isYMM)
28007 putYMMRegLane128( rV, 1, mkV128(0) );
28008 else if (ty == Ity_I32 && count2 == 2) {
28009 putYMMRegLane64( rV, 1, mkU64(0) );
28010 putYMMRegLane64( rG, 1, mkU64(0) );
28012 putYMMRegLane128( rG, 1, mkV128(0) );
28015 *uses_vvvv = True;
28016 return delta;
28020 __attribute__((noinline))
28021 static
28022 Long dis_ESC_0F38__VEX (
28023 /*MB_OUT*/DisResult* dres,
28024 /*OUT*/ Bool* uses_vvvv,
28025 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
28026 Bool resteerCisOk,
28027 void* callback_opaque,
28028 const VexArchInfo* archinfo,
28029 const VexAbiInfo* vbi,
28030 Prefix pfx, Int sz, Long deltaIN
28033 IRTemp addr = IRTemp_INVALID;
28034 Int alen = 0;
28035 HChar dis_buf[50];
28036 Long delta = deltaIN;
28037 UChar opc = getUChar(delta);
28038 delta++;
28039 *uses_vvvv = False;
28041 switch (opc) {
28043 case 0x00:
28044 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28045 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28046 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28047 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28048 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
28049 goto decode_success;
28051 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28052 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28053 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28054 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28055 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
28056 goto decode_success;
28058 break;
28060 case 0x01:
28061 case 0x02:
28062 case 0x03:
28063 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28064 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28065 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28066 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28067 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28068 *uses_vvvv = True;
28069 goto decode_success;
28071 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28072 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28073 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28074 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28075 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28076 *uses_vvvv = True;
28077 goto decode_success;
28079 break;
28081 case 0x04:
28082 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28083 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28084 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28085 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28086 math_PMADDUBSW_128 );
28087 goto decode_success;
28089 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28090 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28091 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28092 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28093 math_PMADDUBSW_256 );
28094 goto decode_success;
28096 break;
28098 case 0x05:
28099 case 0x06:
28100 case 0x07:
28101 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28102 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28103 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28104 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28105 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28106 *uses_vvvv = True;
28107 goto decode_success;
28109 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28110 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28111 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28112 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28113 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28114 *uses_vvvv = True;
28115 goto decode_success;
28117 break;
28119 case 0x08:
28120 case 0x09:
28121 case 0x0A:
28122 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28123 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28124 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28125 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28126 IRTemp sV = newTemp(Ity_V128);
28127 IRTemp dV = newTemp(Ity_V128);
28128 IRTemp sHi, sLo, dHi, dLo;
28129 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28130 HChar ch = '?';
28131 Int laneszB = 0;
28132 UChar modrm = getUChar(delta);
28133 UInt rG = gregOfRexRM(pfx,modrm);
28134 UInt rV = getVexNvvvv(pfx);
28136 switch (opc) {
28137 case 0x08: laneszB = 1; ch = 'b'; break;
28138 case 0x09: laneszB = 2; ch = 'w'; break;
28139 case 0x0A: laneszB = 4; ch = 'd'; break;
28140 default: vassert(0);
28143 assign( dV, getXMMReg(rV) );
28145 if (epartIsReg(modrm)) {
28146 UInt rE = eregOfRexRM(pfx,modrm);
28147 assign( sV, getXMMReg(rE) );
28148 delta += 1;
28149 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
28150 nameXMMReg(rV), nameXMMReg(rG));
28151 } else {
28152 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28153 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28154 delta += alen;
28155 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28156 nameXMMReg(rV), nameXMMReg(rG));
28159 breakupV128to64s( dV, &dHi, &dLo );
28160 breakupV128to64s( sV, &sHi, &sLo );
28162 putYMMRegLoAndZU(
28164 binop(Iop_64HLtoV128,
28165 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
28166 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
28169 *uses_vvvv = True;
28170 goto decode_success;
28172 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28173 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28174 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28175 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28176 IRTemp sV = newTemp(Ity_V256);
28177 IRTemp dV = newTemp(Ity_V256);
28178 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28179 s3 = s2 = s1 = s0 = IRTemp_INVALID;
28180 d3 = d2 = d1 = d0 = IRTemp_INVALID;
28181 UChar ch = '?';
28182 Int laneszB = 0;
28183 UChar modrm = getUChar(delta);
28184 UInt rG = gregOfRexRM(pfx,modrm);
28185 UInt rV = getVexNvvvv(pfx);
28187 switch (opc) {
28188 case 0x08: laneszB = 1; ch = 'b'; break;
28189 case 0x09: laneszB = 2; ch = 'w'; break;
28190 case 0x0A: laneszB = 4; ch = 'd'; break;
28191 default: vassert(0);
28194 assign( dV, getYMMReg(rV) );
28196 if (epartIsReg(modrm)) {
28197 UInt rE = eregOfRexRM(pfx,modrm);
28198 assign( sV, getYMMReg(rE) );
28199 delta += 1;
28200 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
28201 nameYMMReg(rV), nameYMMReg(rG));
28202 } else {
28203 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28204 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28205 delta += alen;
28206 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28207 nameYMMReg(rV), nameYMMReg(rG));
28210 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28211 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28213 putYMMReg(
28215 binop( Iop_V128HLtoV256,
28216 binop(Iop_64HLtoV128,
28217 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
28218 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
28220 binop(Iop_64HLtoV128,
28221 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
28222 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
28226 *uses_vvvv = True;
28227 goto decode_success;
28229 break;
28231 case 0x0B:
28232 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28233 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28234 IRTemp sV = newTemp(Ity_V128);
28235 IRTemp dV = newTemp(Ity_V128);
28236 IRTemp sHi, sLo, dHi, dLo;
28237 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28238 UChar modrm = getUChar(delta);
28239 UInt rG = gregOfRexRM(pfx,modrm);
28240 UInt rV = getVexNvvvv(pfx);
28242 assign( dV, getXMMReg(rV) );
28244 if (epartIsReg(modrm)) {
28245 UInt rE = eregOfRexRM(pfx,modrm);
28246 assign( sV, getXMMReg(rE) );
28247 delta += 1;
28248 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
28249 nameXMMReg(rV), nameXMMReg(rG));
28250 } else {
28251 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28252 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28253 delta += alen;
28254 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28255 nameXMMReg(rV), nameXMMReg(rG));
28258 breakupV128to64s( dV, &dHi, &dLo );
28259 breakupV128to64s( sV, &sHi, &sLo );
28261 putYMMRegLoAndZU(
28263 binop(Iop_64HLtoV128,
28264 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
28265 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
28268 *uses_vvvv = True;
28269 goto decode_success;
28271 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28272 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28273 IRTemp sV = newTemp(Ity_V256);
28274 IRTemp dV = newTemp(Ity_V256);
28275 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28276 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
28277 UChar modrm = getUChar(delta);
28278 UInt rG = gregOfRexRM(pfx,modrm);
28279 UInt rV = getVexNvvvv(pfx);
28281 assign( dV, getYMMReg(rV) );
28283 if (epartIsReg(modrm)) {
28284 UInt rE = eregOfRexRM(pfx,modrm);
28285 assign( sV, getYMMReg(rE) );
28286 delta += 1;
28287 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
28288 nameYMMReg(rV), nameYMMReg(rG));
28289 } else {
28290 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28291 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28292 delta += alen;
28293 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28294 nameYMMReg(rV), nameYMMReg(rG));
28297 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28298 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28300 putYMMReg(
28302 binop(Iop_V128HLtoV256,
28303 binop(Iop_64HLtoV128,
28304 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
28305 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
28306 binop(Iop_64HLtoV128,
28307 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
28308 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
28311 *uses_vvvv = True;
28312 dres->hint = Dis_HintVerbose;
28313 goto decode_success;
28315 break;
28317 case 0x0C:
28318 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28319 if (have66noF2noF3(pfx)
28320 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28321 UChar modrm = getUChar(delta);
28322 UInt rG = gregOfRexRM(pfx, modrm);
28323 UInt rV = getVexNvvvv(pfx);
28324 IRTemp ctrlV = newTemp(Ity_V128);
28325 if (epartIsReg(modrm)) {
28326 UInt rE = eregOfRexRM(pfx, modrm);
28327 delta += 1;
28328 DIP("vpermilps %s,%s,%s\n",
28329 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28330 assign(ctrlV, getXMMReg(rE));
28331 } else {
28332 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28333 delta += alen;
28334 DIP("vpermilps %s,%s,%s\n",
28335 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28336 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28338 IRTemp dataV = newTemp(Ity_V128);
28339 assign(dataV, getXMMReg(rV));
28340 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
28341 putYMMRegLoAndZU(rG, mkexpr(resV));
28342 *uses_vvvv = True;
28343 goto decode_success;
28345 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28346 if (have66noF2noF3(pfx)
28347 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28348 UChar modrm = getUChar(delta);
28349 UInt rG = gregOfRexRM(pfx, modrm);
28350 UInt rV = getVexNvvvv(pfx);
28351 IRTemp ctrlV = newTemp(Ity_V256);
28352 if (epartIsReg(modrm)) {
28353 UInt rE = eregOfRexRM(pfx, modrm);
28354 delta += 1;
28355 DIP("vpermilps %s,%s,%s\n",
28356 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28357 assign(ctrlV, getYMMReg(rE));
28358 } else {
28359 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28360 delta += alen;
28361 DIP("vpermilps %s,%s,%s\n",
28362 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28363 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28365 IRTemp dataV = newTemp(Ity_V256);
28366 assign(dataV, getYMMReg(rV));
28367 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
28368 putYMMReg(rG, mkexpr(resV));
28369 *uses_vvvv = True;
28370 goto decode_success;
28372 break;
28374 case 0x0D:
28375 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28376 if (have66noF2noF3(pfx)
28377 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28378 UChar modrm = getUChar(delta);
28379 UInt rG = gregOfRexRM(pfx, modrm);
28380 UInt rV = getVexNvvvv(pfx);
28381 IRTemp ctrlV = newTemp(Ity_V128);
28382 if (epartIsReg(modrm)) {
28383 UInt rE = eregOfRexRM(pfx, modrm);
28384 delta += 1;
28385 DIP("vpermilpd %s,%s,%s\n",
28386 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28387 assign(ctrlV, getXMMReg(rE));
28388 } else {
28389 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28390 delta += alen;
28391 DIP("vpermilpd %s,%s,%s\n",
28392 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28393 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28395 IRTemp dataV = newTemp(Ity_V128);
28396 assign(dataV, getXMMReg(rV));
28397 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
28398 putYMMRegLoAndZU(rG, mkexpr(resV));
28399 *uses_vvvv = True;
28400 goto decode_success;
28402 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28403 if (have66noF2noF3(pfx)
28404 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28405 UChar modrm = getUChar(delta);
28406 UInt rG = gregOfRexRM(pfx, modrm);
28407 UInt rV = getVexNvvvv(pfx);
28408 IRTemp ctrlV = newTemp(Ity_V256);
28409 if (epartIsReg(modrm)) {
28410 UInt rE = eregOfRexRM(pfx, modrm);
28411 delta += 1;
28412 DIP("vpermilpd %s,%s,%s\n",
28413 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28414 assign(ctrlV, getYMMReg(rE));
28415 } else {
28416 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28417 delta += alen;
28418 DIP("vpermilpd %s,%s,%s\n",
28419 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28420 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28422 IRTemp dataV = newTemp(Ity_V256);
28423 assign(dataV, getYMMReg(rV));
28424 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
28425 putYMMReg(rG, mkexpr(resV));
28426 *uses_vvvv = True;
28427 goto decode_success;
28429 break;
28431 case 0x0E:
28432 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28433 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28434 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
28435 goto decode_success;
28437 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28438 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28439 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
28440 goto decode_success;
28442 break;
28444 case 0x0F:
28445 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28446 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28447 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
28448 goto decode_success;
28450 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28451 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28452 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
28453 goto decode_success;
28455 break;
28457 case 0x16:
28458 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28459 if (have66noF2noF3(pfx)
28460 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28461 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28462 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
28463 goto decode_success;
28465 break;
28467 case 0x17:
28468 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28469 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28470 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
28471 goto decode_success;
28473 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28474 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28475 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
28476 goto decode_success;
28478 break;
28480 case 0x18:
28481 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28482 if (have66noF2noF3(pfx)
28483 && 0==getVexL(pfx)/*128*/
28484 && !epartIsReg(getUChar(delta))) {
28485 UChar modrm = getUChar(delta);
28486 UInt rG = gregOfRexRM(pfx, modrm);
28487 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28488 delta += alen;
28489 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
28490 IRTemp t32 = newTemp(Ity_I32);
28491 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28492 IRTemp t64 = newTemp(Ity_I64);
28493 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28494 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28495 putYMMRegLoAndZU(rG, res);
28496 goto decode_success;
28498 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28499 if (have66noF2noF3(pfx)
28500 && 1==getVexL(pfx)/*256*/
28501 && !epartIsReg(getUChar(delta))) {
28502 UChar modrm = getUChar(delta);
28503 UInt rG = gregOfRexRM(pfx, modrm);
28504 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28505 delta += alen;
28506 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
28507 IRTemp t32 = newTemp(Ity_I32);
28508 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28509 IRTemp t64 = newTemp(Ity_I64);
28510 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28511 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28512 mkexpr(t64), mkexpr(t64));
28513 putYMMReg(rG, res);
28514 goto decode_success;
28516 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28517 if (have66noF2noF3(pfx)
28518 && 0==getVexL(pfx)/*128*/
28519 && epartIsReg(getUChar(delta))) {
28520 UChar modrm = getUChar(delta);
28521 UInt rG = gregOfRexRM(pfx, modrm);
28522 UInt rE = eregOfRexRM(pfx, modrm);
28523 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28524 IRTemp t32 = newTemp(Ity_I32);
28525 assign(t32, getXMMRegLane32(rE, 0));
28526 IRTemp t64 = newTemp(Ity_I64);
28527 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28528 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28529 putYMMRegLoAndZU(rG, res);
28530 delta++;
28531 goto decode_success;
28533 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28534 if (have66noF2noF3(pfx)
28535 && 1==getVexL(pfx)/*256*/
28536 && epartIsReg(getUChar(delta))) {
28537 UChar modrm = getUChar(delta);
28538 UInt rG = gregOfRexRM(pfx, modrm);
28539 UInt rE = eregOfRexRM(pfx, modrm);
28540 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28541 IRTemp t32 = newTemp(Ity_I32);
28542 assign(t32, getXMMRegLane32(rE, 0));
28543 IRTemp t64 = newTemp(Ity_I64);
28544 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28545 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28546 mkexpr(t64), mkexpr(t64));
28547 putYMMReg(rG, res);
28548 delta++;
28549 goto decode_success;
28551 break;
28553 case 0x19:
28554 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28555 if (have66noF2noF3(pfx)
28556 && 1==getVexL(pfx)/*256*/
28557 && !epartIsReg(getUChar(delta))) {
28558 UChar modrm = getUChar(delta);
28559 UInt rG = gregOfRexRM(pfx, modrm);
28560 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28561 delta += alen;
28562 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
28563 IRTemp t64 = newTemp(Ity_I64);
28564 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28565 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28566 mkexpr(t64), mkexpr(t64));
28567 putYMMReg(rG, res);
28568 goto decode_success;
28570 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28571 if (have66noF2noF3(pfx)
28572 && 1==getVexL(pfx)/*256*/
28573 && epartIsReg(getUChar(delta))) {
28574 UChar modrm = getUChar(delta);
28575 UInt rG = gregOfRexRM(pfx, modrm);
28576 UInt rE = eregOfRexRM(pfx, modrm);
28577 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28578 IRTemp t64 = newTemp(Ity_I64);
28579 assign(t64, getXMMRegLane64(rE, 0));
28580 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28581 mkexpr(t64), mkexpr(t64));
28582 putYMMReg(rG, res);
28583 delta++;
28584 goto decode_success;
28586 break;
28588 case 0x1A:
28589 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28590 if (have66noF2noF3(pfx)
28591 && 1==getVexL(pfx)/*256*/
28592 && !epartIsReg(getUChar(delta))) {
28593 UChar modrm = getUChar(delta);
28594 UInt rG = gregOfRexRM(pfx, modrm);
28595 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28596 delta += alen;
28597 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
28598 IRTemp t128 = newTemp(Ity_V128);
28599 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28600 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28601 goto decode_success;
28603 break;
28605 case 0x1C:
28606 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28607 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28608 delta = dis_AVX128_E_to_G_unary(
28609 uses_vvvv, vbi, pfx, delta,
28610 "vpabsb", math_PABS_XMM_pap1 );
28611 goto decode_success;
28613 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28614 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28615 delta = dis_AVX256_E_to_G_unary(
28616 uses_vvvv, vbi, pfx, delta,
28617 "vpabsb", math_PABS_YMM_pap1 );
28618 goto decode_success;
28620 break;
28622 case 0x1D:
28623 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28624 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28625 delta = dis_AVX128_E_to_G_unary(
28626 uses_vvvv, vbi, pfx, delta,
28627 "vpabsw", math_PABS_XMM_pap2 );
28628 goto decode_success;
28630 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28631 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28632 delta = dis_AVX256_E_to_G_unary(
28633 uses_vvvv, vbi, pfx, delta,
28634 "vpabsw", math_PABS_YMM_pap2 );
28635 goto decode_success;
28637 break;
28639 case 0x1E:
28640 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28641 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28642 delta = dis_AVX128_E_to_G_unary(
28643 uses_vvvv, vbi, pfx, delta,
28644 "vpabsd", math_PABS_XMM_pap4 );
28645 goto decode_success;
28647 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28648 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28649 delta = dis_AVX256_E_to_G_unary(
28650 uses_vvvv, vbi, pfx, delta,
28651 "vpabsd", math_PABS_YMM_pap4 );
28652 goto decode_success;
28654 break;
28656 case 0x20:
28657 /* VPMOVSXBW xmm2/m64, xmm1 */
28658 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28659 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28660 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28661 True/*isAvx*/, False/*!xIsZ*/ );
28662 goto decode_success;
28664 /* VPMOVSXBW xmm2/m128, ymm1 */
28665 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28666 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28667 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28668 goto decode_success;
28670 break;
28672 case 0x21:
28673 /* VPMOVSXBD xmm2/m32, xmm1 */
28674 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28675 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28676 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28677 True/*isAvx*/, False/*!xIsZ*/ );
28678 goto decode_success;
28680 /* VPMOVSXBD xmm2/m64, ymm1 */
28681 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28682 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28683 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28684 goto decode_success;
28686 break;
28688 case 0x22:
28689 /* VPMOVSXBQ xmm2/m16, xmm1 */
28690 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28691 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28692 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28693 goto decode_success;
28695 /* VPMOVSXBQ xmm2/m32, ymm1 */
28696 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28697 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28698 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28699 goto decode_success;
28701 break;
28703 case 0x23:
28704 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28705 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28706 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28707 True/*isAvx*/, False/*!xIsZ*/ );
28708 goto decode_success;
28710 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28711 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28712 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28713 goto decode_success;
28715 break;
28717 case 0x24:
28718 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28719 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28720 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28721 goto decode_success;
28723 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28724 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28725 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28726 goto decode_success;
28728 break;
28730 case 0x25:
28731 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28732 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28733 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28734 True/*isAvx*/, False/*!xIsZ*/ );
28735 goto decode_success;
28737 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28738 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28739 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28740 goto decode_success;
28742 break;
28744 case 0x28:
28745 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28746 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28747 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28748 uses_vvvv, vbi, pfx, delta,
28749 "vpmuldq", math_PMULDQ_128 );
28750 goto decode_success;
28752 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28753 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28754 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28755 uses_vvvv, vbi, pfx, delta,
28756 "vpmuldq", math_PMULDQ_256 );
28757 goto decode_success;
28759 break;
28761 case 0x29:
28762 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28763 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28764 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28765 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28766 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28767 goto decode_success;
28769 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28770 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28771 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28772 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28773 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28774 goto decode_success;
28776 break;
28778 case 0x2A:
28779 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28780 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28781 && !epartIsReg(getUChar(delta))) {
28782 UChar modrm = getUChar(delta);
28783 UInt rD = gregOfRexRM(pfx, modrm);
28784 IRTemp tD = newTemp(Ity_V128);
28785 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28786 delta += alen;
28787 gen_SEGV_if_not_16_aligned(addr);
28788 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28789 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28790 putYMMRegLoAndZU(rD, mkexpr(tD));
28791 goto decode_success;
28793 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28794 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28795 && !epartIsReg(getUChar(delta))) {
28796 UChar modrm = getUChar(delta);
28797 UInt rD = gregOfRexRM(pfx, modrm);
28798 IRTemp tD = newTemp(Ity_V256);
28799 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28800 delta += alen;
28801 gen_SEGV_if_not_32_aligned(addr);
28802 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28803 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28804 putYMMReg(rD, mkexpr(tD));
28805 goto decode_success;
28807 break;
28809 case 0x2B:
28810 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28811 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28812 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28813 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28814 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28815 Iop_QNarrowBin32Sto16Ux8, NULL,
28816 False/*!invertLeftArg*/, True/*swapArgs*/ );
28817 goto decode_success;
28819 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28820 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28821 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28822 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28823 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28824 math_VPACKUSDW_YMM );
28825 goto decode_success;
28827 break;
28829 case 0x2C:
28830 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28831 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28832 && 0==getRexW(pfx)/*W0*/
28833 && !epartIsReg(getUChar(delta))) {
28834 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28835 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
28836 goto decode_success;
28838 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28839 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28840 && 0==getRexW(pfx)/*W0*/
28841 && !epartIsReg(getUChar(delta))) {
28842 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28843 /*isYMM*/True, Ity_I32, /*isLoad*/True );
28844 goto decode_success;
28846 break;
28848 case 0x2D:
28849 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
28850 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28851 && 0==getRexW(pfx)/*W0*/
28852 && !epartIsReg(getUChar(delta))) {
28853 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28854 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
28855 goto decode_success;
28857 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
28858 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28859 && 0==getRexW(pfx)/*W0*/
28860 && !epartIsReg(getUChar(delta))) {
28861 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28862 /*isYMM*/True, Ity_I64, /*isLoad*/True );
28863 goto decode_success;
28865 break;
28867 case 0x2E:
28868 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
28869 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28870 && 0==getRexW(pfx)/*W0*/
28871 && !epartIsReg(getUChar(delta))) {
28872 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28873 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
28874 goto decode_success;
28876 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
28877 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28878 && 0==getRexW(pfx)/*W0*/
28879 && !epartIsReg(getUChar(delta))) {
28880 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28881 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
28882 goto decode_success;
28884 break;
28886 case 0x2F:
28887 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
28888 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28889 && 0==getRexW(pfx)/*W0*/
28890 && !epartIsReg(getUChar(delta))) {
28891 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28892 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
28893 goto decode_success;
28895 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
28896 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28897 && 0==getRexW(pfx)/*W0*/
28898 && !epartIsReg(getUChar(delta))) {
28899 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28900 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
28901 goto decode_success;
28903 break;
28905 case 0x30:
28906 /* VPMOVZXBW xmm2/m64, xmm1 */
28907 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
28908 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28909 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28910 True/*isAvx*/, True/*xIsZ*/ );
28911 goto decode_success;
28913 /* VPMOVZXBW xmm2/m128, ymm1 */
28914 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
28915 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28916 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
28917 goto decode_success;
28919 break;
28921 case 0x31:
28922 /* VPMOVZXBD xmm2/m32, xmm1 */
28923 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
28924 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28925 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28926 True/*isAvx*/, True/*xIsZ*/ );
28927 goto decode_success;
28929 /* VPMOVZXBD xmm2/m64, ymm1 */
28930 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
28931 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28932 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
28933 goto decode_success;
28935 break;
28937 case 0x32:
28938 /* VPMOVZXBQ xmm2/m16, xmm1 */
28939 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
28940 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28941 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28942 goto decode_success;
28944 /* VPMOVZXBQ xmm2/m32, ymm1 */
28945 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
28946 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28947 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
28948 goto decode_success;
28950 break;
28952 case 0x33:
28953 /* VPMOVZXWD xmm2/m64, xmm1 */
28954 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
28955 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28956 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28957 True/*isAvx*/, True/*xIsZ*/ );
28958 goto decode_success;
28960 /* VPMOVZXWD xmm2/m128, ymm1 */
28961 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
28962 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28963 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
28964 goto decode_success;
28966 break;
28968 case 0x34:
28969 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
28970 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28971 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28972 goto decode_success;
28974 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
28975 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28976 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
28977 goto decode_success;
28979 break;
28981 case 0x35:
28982 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
28983 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28984 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28985 True/*isAvx*/, True/*xIsZ*/ );
28986 goto decode_success;
28988 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
28989 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28990 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
28991 goto decode_success;
28993 break;
28995 case 0x36:
28996 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
28997 if (have66noF2noF3(pfx)
28998 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28999 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29000 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
29001 goto decode_success;
29003 break;
29005 case 0x37:
29006 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29007 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29008 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29009 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29010 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
29011 goto decode_success;
29013 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29014 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29015 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29016 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29017 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
29018 goto decode_success;
29020 break;
29022 case 0x38:
29023 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29024 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29025 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29026 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29027 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
29028 goto decode_success;
29030 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29031 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29032 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29033 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29034 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
29035 goto decode_success;
29037 break;
29039 case 0x39:
29040 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29041 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29042 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29043 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29044 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
29045 goto decode_success;
29047 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29048 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29049 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29050 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29051 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
29052 goto decode_success;
29054 break;
29056 case 0x3A:
29057 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29058 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29059 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29060 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29061 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
29062 goto decode_success;
29064 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29065 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29067 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29068 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
29069 goto decode_success;
29071 break;
29073 case 0x3B:
29074 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29075 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29076 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29077 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29078 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
29079 goto decode_success;
29081 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29082 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29083 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29084 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29085 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
29086 goto decode_success;
29088 break;
29090 case 0x3C:
29091 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29092 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29093 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29094 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29095 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
29096 goto decode_success;
29098 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29099 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29100 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29101 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29102 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
29103 goto decode_success;
29105 break;
29107 case 0x3D:
29108 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29109 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29110 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29111 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29112 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
29113 goto decode_success;
29115 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29116 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29117 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29118 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29119 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
29120 goto decode_success;
29122 break;
29124 case 0x3E:
29125 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29126 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29127 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29128 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29129 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
29130 goto decode_success;
29132 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29133 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29134 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29135 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29136 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
29137 goto decode_success;
29139 break;
29141 case 0x3F:
29142 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29143 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29144 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29145 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29146 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
29147 goto decode_success;
29149 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29150 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29151 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29152 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29153 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
29154 goto decode_success;
29156 break;
29158 case 0x40:
29159 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29160 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29161 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29162 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29163 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
29164 goto decode_success;
29166 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29167 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29168 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29169 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29170 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
29171 goto decode_success;
29173 break;
29175 case 0x41:
29176 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29177 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29178 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
29179 goto decode_success;
29181 break;
29183 case 0x45:
29184 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29185 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29186 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29187 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
29188 Iop_Shr32, 1==getVexL(pfx) );
29189 *uses_vvvv = True;
29190 goto decode_success;
29192 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29193 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29194 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29195 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
29196 Iop_Shr64, 1==getVexL(pfx) );
29197 *uses_vvvv = True;
29198 goto decode_success;
29200 break;
29202 case 0x46:
29203 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29204 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29205 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29206 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
29207 Iop_Sar32, 1==getVexL(pfx) );
29208 *uses_vvvv = True;
29209 goto decode_success;
29211 break;
29213 case 0x47:
29214 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29215 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29216 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29217 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
29218 Iop_Shl32, 1==getVexL(pfx) );
29219 *uses_vvvv = True;
29220 goto decode_success;
29222 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29223 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29224 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29225 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
29226 Iop_Shl64, 1==getVexL(pfx) );
29227 *uses_vvvv = True;
29228 goto decode_success;
29230 break;
29232 case 0x58:
29233 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29234 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29235 && 0==getRexW(pfx)/*W0*/) {
29236 UChar modrm = getUChar(delta);
29237 UInt rG = gregOfRexRM(pfx, modrm);
29238 IRTemp t32 = newTemp(Ity_I32);
29239 if (epartIsReg(modrm)) {
29240 UInt rE = eregOfRexRM(pfx, modrm);
29241 delta++;
29242 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29243 assign(t32, getXMMRegLane32(rE, 0));
29244 } else {
29245 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29246 delta += alen;
29247 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
29248 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29250 IRTemp t64 = newTemp(Ity_I64);
29251 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29252 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29253 putYMMRegLoAndZU(rG, res);
29254 goto decode_success;
29256 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29257 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29258 && 0==getRexW(pfx)/*W0*/) {
29259 UChar modrm = getUChar(delta);
29260 UInt rG = gregOfRexRM(pfx, modrm);
29261 IRTemp t32 = newTemp(Ity_I32);
29262 if (epartIsReg(modrm)) {
29263 UInt rE = eregOfRexRM(pfx, modrm);
29264 delta++;
29265 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29266 assign(t32, getXMMRegLane32(rE, 0));
29267 } else {
29268 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29269 delta += alen;
29270 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
29271 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29273 IRTemp t64 = newTemp(Ity_I64);
29274 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29275 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29276 mkexpr(t64), mkexpr(t64));
29277 putYMMReg(rG, res);
29278 goto decode_success;
29280 break;
29282 case 0x59:
29283 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29284 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29285 && 0==getRexW(pfx)/*W0*/) {
29286 UChar modrm = getUChar(delta);
29287 UInt rG = gregOfRexRM(pfx, modrm);
29288 IRTemp t64 = newTemp(Ity_I64);
29289 if (epartIsReg(modrm)) {
29290 UInt rE = eregOfRexRM(pfx, modrm);
29291 delta++;
29292 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29293 assign(t64, getXMMRegLane64(rE, 0));
29294 } else {
29295 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29296 delta += alen;
29297 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
29298 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29300 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29301 putYMMRegLoAndZU(rG, res);
29302 goto decode_success;
29304 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29305 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29306 && 0==getRexW(pfx)/*W0*/) {
29307 UChar modrm = getUChar(delta);
29308 UInt rG = gregOfRexRM(pfx, modrm);
29309 IRTemp t64 = newTemp(Ity_I64);
29310 if (epartIsReg(modrm)) {
29311 UInt rE = eregOfRexRM(pfx, modrm);
29312 delta++;
29313 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29314 assign(t64, getXMMRegLane64(rE, 0));
29315 } else {
29316 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29317 delta += alen;
29318 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
29319 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29321 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29322 mkexpr(t64), mkexpr(t64));
29323 putYMMReg(rG, res);
29324 goto decode_success;
29326 break;
29328 case 0x5A:
29329 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29330 if (have66noF2noF3(pfx)
29331 && 1==getVexL(pfx)/*256*/
29332 && !epartIsReg(getUChar(delta))) {
29333 UChar modrm = getUChar(delta);
29334 UInt rG = gregOfRexRM(pfx, modrm);
29335 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29336 delta += alen;
29337 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
29338 IRTemp t128 = newTemp(Ity_V128);
29339 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
29340 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
29341 goto decode_success;
29343 break;
29345 case 0x78:
29346 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29347 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29348 && 0==getRexW(pfx)/*W0*/) {
29349 UChar modrm = getUChar(delta);
29350 UInt rG = gregOfRexRM(pfx, modrm);
29351 IRTemp t8 = newTemp(Ity_I8);
29352 if (epartIsReg(modrm)) {
29353 UInt rE = eregOfRexRM(pfx, modrm);
29354 delta++;
29355 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29356 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29357 } else {
29358 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29359 delta += alen;
29360 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
29361 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29363 IRTemp t16 = newTemp(Ity_I16);
29364 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29365 IRTemp t32 = newTemp(Ity_I32);
29366 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29367 IRTemp t64 = newTemp(Ity_I64);
29368 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29369 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29370 putYMMRegLoAndZU(rG, res);
29371 goto decode_success;
29373 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29374 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29375 && 0==getRexW(pfx)/*W0*/) {
29376 UChar modrm = getUChar(delta);
29377 UInt rG = gregOfRexRM(pfx, modrm);
29378 IRTemp t8 = newTemp(Ity_I8);
29379 if (epartIsReg(modrm)) {
29380 UInt rE = eregOfRexRM(pfx, modrm);
29381 delta++;
29382 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29383 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29384 } else {
29385 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29386 delta += alen;
29387 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
29388 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29390 IRTemp t16 = newTemp(Ity_I16);
29391 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29392 IRTemp t32 = newTemp(Ity_I32);
29393 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29394 IRTemp t64 = newTemp(Ity_I64);
29395 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29396 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29397 mkexpr(t64), mkexpr(t64));
29398 putYMMReg(rG, res);
29399 goto decode_success;
29401 break;
29403 case 0x79:
29404 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29405 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29406 && 0==getRexW(pfx)/*W0*/) {
29407 UChar modrm = getUChar(delta);
29408 UInt rG = gregOfRexRM(pfx, modrm);
29409 IRTemp t16 = newTemp(Ity_I16);
29410 if (epartIsReg(modrm)) {
29411 UInt rE = eregOfRexRM(pfx, modrm);
29412 delta++;
29413 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29414 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29415 } else {
29416 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29417 delta += alen;
29418 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
29419 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29421 IRTemp t32 = newTemp(Ity_I32);
29422 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29423 IRTemp t64 = newTemp(Ity_I64);
29424 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29425 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29426 putYMMRegLoAndZU(rG, res);
29427 goto decode_success;
29429 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29430 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29431 && 0==getRexW(pfx)/*W0*/) {
29432 UChar modrm = getUChar(delta);
29433 UInt rG = gregOfRexRM(pfx, modrm);
29434 IRTemp t16 = newTemp(Ity_I16);
29435 if (epartIsReg(modrm)) {
29436 UInt rE = eregOfRexRM(pfx, modrm);
29437 delta++;
29438 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29439 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29440 } else {
29441 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29442 delta += alen;
29443 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
29444 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29446 IRTemp t32 = newTemp(Ity_I32);
29447 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29448 IRTemp t64 = newTemp(Ity_I64);
29449 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29450 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29451 mkexpr(t64), mkexpr(t64));
29452 putYMMReg(rG, res);
29453 goto decode_success;
29455 break;
29457 case 0x8C:
29458 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29459 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29460 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29461 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29462 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
29463 goto decode_success;
29465 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29466 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29467 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29468 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29469 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29470 goto decode_success;
29472 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29473 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29474 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29475 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29476 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29477 goto decode_success;
29479 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29480 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29481 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29482 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29483 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29484 goto decode_success;
29486 break;
29488 case 0x8E:
29489 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29490 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29491 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29492 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29493 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29494 goto decode_success;
29496 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29497 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29498 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29499 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29500 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29501 goto decode_success;
29503 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29504 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29505 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29506 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29507 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29508 goto decode_success;
29510 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29511 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29512 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29513 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29514 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29515 goto decode_success;
29517 break;
29519 case 0x90:
29520 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29521 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29522 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29523 Long delta0 = delta;
29524 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29525 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29526 if (delta != delta0)
29527 goto decode_success;
29529 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29530 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29531 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29532 Long delta0 = delta;
29533 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29534 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29535 if (delta != delta0)
29536 goto decode_success;
29538 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29539 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29540 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29541 Long delta0 = delta;
29542 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29543 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29544 if (delta != delta0)
29545 goto decode_success;
29547 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29548 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29549 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29550 Long delta0 = delta;
29551 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29552 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29553 if (delta != delta0)
29554 goto decode_success;
29556 break;
29558 case 0x91:
29559 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29560 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29561 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29562 Long delta0 = delta;
29563 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29564 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29565 if (delta != delta0)
29566 goto decode_success;
29568 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29569 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29570 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29571 Long delta0 = delta;
29572 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29573 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29574 if (delta != delta0)
29575 goto decode_success;
29577 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29578 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29579 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29580 Long delta0 = delta;
29581 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29582 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29583 if (delta != delta0)
29584 goto decode_success;
29586 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29587 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29588 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29589 Long delta0 = delta;
29590 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29591 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29592 if (delta != delta0)
29593 goto decode_success;
29595 break;
29597 case 0x92:
29598 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29599 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29600 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29601 Long delta0 = delta;
29602 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29603 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29604 if (delta != delta0)
29605 goto decode_success;
29607 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29608 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29609 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29610 Long delta0 = delta;
29611 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29612 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29613 if (delta != delta0)
29614 goto decode_success;
29616 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29617 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29618 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29619 Long delta0 = delta;
29620 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29621 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29622 if (delta != delta0)
29623 goto decode_success;
29625 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29626 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29627 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29628 Long delta0 = delta;
29629 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29630 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29631 if (delta != delta0)
29632 goto decode_success;
29634 break;
29636 case 0x93:
29637 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29638 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29639 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29640 Long delta0 = delta;
29641 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29642 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29643 if (delta != delta0)
29644 goto decode_success;
29646 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29647 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29648 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29649 Long delta0 = delta;
29650 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29651 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29652 if (delta != delta0)
29653 goto decode_success;
29655 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29656 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29657 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29658 Long delta0 = delta;
29659 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29660 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29661 if (delta != delta0)
29662 goto decode_success;
29664 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29665 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29666 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29667 Long delta0 = delta;
29668 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29669 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29670 if (delta != delta0)
29671 goto decode_success;
29673 break;
29675 case 0x96 ... 0x9F:
29676 case 0xA6 ... 0xAF:
29677 case 0xB6 ... 0xBF:
29678 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29679 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29680 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29681 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29682 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29683 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29684 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29685 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29686 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29687 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29688 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29689 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29690 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29691 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29692 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29693 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29694 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29695 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29696 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29697 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29698 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29699 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29700 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29701 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29702 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29703 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29704 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29705 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29706 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29707 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29708 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29709 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29710 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29711 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29712 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29713 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29714 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29715 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29716 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29717 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29718 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29719 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29720 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29721 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29722 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29723 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29724 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29725 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29726 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29727 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29728 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29729 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29730 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29731 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29732 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29733 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29734 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29735 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29736 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29737 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29738 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29739 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29740 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29741 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29742 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29743 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29744 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29745 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29746 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29747 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29748 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29749 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29750 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29751 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29752 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29753 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29754 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29755 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29756 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29757 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29758 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29759 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29760 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29761 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29762 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29763 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29764 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29765 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29766 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29767 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29768 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29769 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29770 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29771 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29772 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29773 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29774 if (have66noF2noF3(pfx)) {
29775 delta = dis_FMA( vbi, pfx, delta, opc );
29776 *uses_vvvv = True;
29777 dres->hint = Dis_HintVerbose;
29778 goto decode_success;
29780 break;
29782 case 0xDB:
29783 case 0xDC:
29784 case 0xDD:
29785 case 0xDE:
29786 case 0xDF:
29787 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29788 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29789 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29790 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29791 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29792 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29793 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29794 if (opc != 0xDB) *uses_vvvv = True;
29795 goto decode_success;
29797 break;
29799 case 0xF2:
29800 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29801 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29802 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29803 Int size = getRexW(pfx) ? 8 : 4;
29804 IRType ty = szToITy(size);
29805 IRTemp dst = newTemp(ty);
29806 IRTemp src1 = newTemp(ty);
29807 IRTemp src2 = newTemp(ty);
29808 UChar rm = getUChar(delta);
29810 assign( src1, getIRegV(size,pfx) );
29811 if (epartIsReg(rm)) {
29812 assign( src2, getIRegE(size,pfx,rm) );
29813 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29814 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29815 delta++;
29816 } else {
29817 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29818 assign( src2, loadLE(ty, mkexpr(addr)) );
29819 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29820 nameIRegG(size,pfx,rm));
29821 delta += alen;
29824 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29825 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29826 mkexpr(src2) ) );
29827 putIRegG( size, pfx, rm, mkexpr(dst) );
29828 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29829 ? AMD64G_CC_OP_ANDN64
29830 : AMD64G_CC_OP_ANDN32)) );
29831 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29832 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29833 *uses_vvvv = True;
29834 goto decode_success;
29836 break;
29838 case 0xF3:
29839 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29840 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29841 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29842 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
29843 Int size = getRexW(pfx) ? 8 : 4;
29844 IRType ty = szToITy(size);
29845 IRTemp src = newTemp(ty);
29846 IRTemp dst = newTemp(ty);
29847 UChar rm = getUChar(delta);
29849 if (epartIsReg(rm)) {
29850 assign( src, getIRegE(size,pfx,rm) );
29851 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
29852 nameIRegV(size,pfx));
29853 delta++;
29854 } else {
29855 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29856 assign( src, loadLE(ty, mkexpr(addr)) );
29857 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
29858 delta += alen;
29861 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29862 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
29863 mkexpr(src)), mkexpr(src)) );
29864 putIRegV( size, pfx, mkexpr(dst) );
29865 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29866 ? AMD64G_CC_OP_BLSI64
29867 : AMD64G_CC_OP_BLSI32)) );
29868 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29869 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29870 *uses_vvvv = True;
29871 goto decode_success;
29873 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
29874 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
29875 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29876 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
29877 Int size = getRexW(pfx) ? 8 : 4;
29878 IRType ty = szToITy(size);
29879 IRTemp src = newTemp(ty);
29880 IRTemp dst = newTemp(ty);
29881 UChar rm = getUChar(delta);
29883 if (epartIsReg(rm)) {
29884 assign( src, getIRegE(size,pfx,rm) );
29885 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
29886 nameIRegV(size,pfx));
29887 delta++;
29888 } else {
29889 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29890 assign( src, loadLE(ty, mkexpr(addr)) );
29891 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
29892 delta += alen;
29895 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
29896 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29897 mkU(ty, 1)), mkexpr(src)) );
29898 putIRegV( size, pfx, mkexpr(dst) );
29899 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29900 ? AMD64G_CC_OP_BLSMSK64
29901 : AMD64G_CC_OP_BLSMSK32)) );
29902 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29903 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29904 *uses_vvvv = True;
29905 goto decode_success;
29907 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
29908 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
29909 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29910 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
29911 Int size = getRexW(pfx) ? 8 : 4;
29912 IRType ty = szToITy(size);
29913 IRTemp src = newTemp(ty);
29914 IRTemp dst = newTemp(ty);
29915 UChar rm = getUChar(delta);
29917 if (epartIsReg(rm)) {
29918 assign( src, getIRegE(size,pfx,rm) );
29919 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
29920 nameIRegV(size,pfx));
29921 delta++;
29922 } else {
29923 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29924 assign( src, loadLE(ty, mkexpr(addr)) );
29925 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
29926 delta += alen;
29929 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29930 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29931 mkU(ty, 1)), mkexpr(src)) );
29932 putIRegV( size, pfx, mkexpr(dst) );
29933 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29934 ? AMD64G_CC_OP_BLSR64
29935 : AMD64G_CC_OP_BLSR32)) );
29936 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29937 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29938 *uses_vvvv = True;
29939 goto decode_success;
29941 break;
29943 case 0xF5:
29944 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
29945 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
29946 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29947 Int size = getRexW(pfx) ? 8 : 4;
29948 IRType ty = szToITy(size);
29949 IRTemp dst = newTemp(ty);
29950 IRTemp src1 = newTemp(ty);
29951 IRTemp src2 = newTemp(ty);
29952 IRTemp start = newTemp(Ity_I8);
29953 IRTemp cond = newTemp(Ity_I1);
29954 UChar rm = getUChar(delta);
29956 assign( src2, getIRegV(size,pfx) );
29957 if (epartIsReg(rm)) {
29958 assign( src1, getIRegE(size,pfx,rm) );
29959 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
29960 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
29961 delta++;
29962 } else {
29963 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29964 assign( src1, loadLE(ty, mkexpr(addr)) );
29965 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
29966 nameIRegG(size,pfx,rm));
29967 delta += alen;
29970 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
29971 assign( cond, binop(Iop_CmpLT32U,
29972 unop(Iop_8Uto32, mkexpr(start)),
29973 mkU32(8*size)) );
29974 /* if (start < opsize) {
29975 if (start == 0)
29976 dst = 0;
29977 else
29978 dst = (src1 << (opsize-start)) u>> (opsize-start);
29979 } else {
29980 dst = src1;
29981 } */
29982 assign( dst,
29983 IRExpr_ITE(
29984 mkexpr(cond),
29985 IRExpr_ITE(
29986 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
29987 mkU(ty, 0),
29988 binop(
29989 mkSizedOp(ty,Iop_Shr8),
29990 binop(
29991 mkSizedOp(ty,Iop_Shl8),
29992 mkexpr(src1),
29993 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
29995 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
29998 mkexpr(src1)
30001 putIRegG( size, pfx, rm, mkexpr(dst) );
30002 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30003 ? AMD64G_CC_OP_BLSR64
30004 : AMD64G_CC_OP_BLSR32)) );
30005 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30006 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
30007 *uses_vvvv = True;
30008 goto decode_success;
30010 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30011 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30012 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30013 Int size = getRexW(pfx) ? 8 : 4;
30014 IRType ty = szToITy(size);
30015 IRTemp src = newTemp(ty);
30016 IRTemp mask = newTemp(ty);
30017 UChar rm = getUChar(delta);
30019 assign( src, getIRegV(size,pfx) );
30020 if (epartIsReg(rm)) {
30021 assign( mask, getIRegE(size,pfx,rm) );
30022 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
30023 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30024 delta++;
30025 } else {
30026 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30027 assign( mask, loadLE(ty, mkexpr(addr)) );
30028 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30029 nameIRegG(size,pfx,rm));
30030 delta += alen;
30033 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
30034 widenUto64(mkexpr(mask)) );
30035 putIRegG( size, pfx, rm,
30036 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30037 "amd64g_calculate_pdep",
30038 &amd64g_calculate_pdep, args)) );
30039 *uses_vvvv = True;
30040 /* Flags aren't modified. */
30041 goto decode_success;
30043 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30044 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30045 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30046 Int size = getRexW(pfx) ? 8 : 4;
30047 IRType ty = szToITy(size);
30048 IRTemp src = newTemp(ty);
30049 IRTemp mask = newTemp(ty);
30050 UChar rm = getUChar(delta);
30052 assign( src, getIRegV(size,pfx) );
30053 if (epartIsReg(rm)) {
30054 assign( mask, getIRegE(size,pfx,rm) );
30055 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
30056 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30057 delta++;
30058 } else {
30059 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30060 assign( mask, loadLE(ty, mkexpr(addr)) );
30061 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30062 nameIRegG(size,pfx,rm));
30063 delta += alen;
30066 /* First mask off bits not set in mask, they are ignored
30067 and it should be fine if they contain undefined values. */
30068 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
30069 mkexpr(src), mkexpr(mask));
30070 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
30071 widenUto64(mkexpr(mask)) );
30072 putIRegG( size, pfx, rm,
30073 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30074 "amd64g_calculate_pext",
30075 &amd64g_calculate_pext, args)) );
30076 *uses_vvvv = True;
30077 /* Flags aren't modified. */
30078 goto decode_success;
30080 break;
30082 case 0xF6:
30083 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30084 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30085 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30086 Int size = getRexW(pfx) ? 8 : 4;
30087 IRType ty = szToITy(size);
30088 IRTemp src1 = newTemp(ty);
30089 IRTemp src2 = newTemp(ty);
30090 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
30091 UChar rm = getUChar(delta);
30093 assign( src1, getIRegRDX(size) );
30094 if (epartIsReg(rm)) {
30095 assign( src2, getIRegE(size,pfx,rm) );
30096 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
30097 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30098 delta++;
30099 } else {
30100 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30101 assign( src2, loadLE(ty, mkexpr(addr)) );
30102 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30103 nameIRegG(size,pfx,rm));
30104 delta += alen;
30107 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
30108 mkexpr(src1), mkexpr(src2)) );
30109 putIRegV( size, pfx,
30110 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
30111 putIRegG( size, pfx, rm,
30112 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
30113 mkexpr(res)) );
30114 *uses_vvvv = True;
30115 /* Flags aren't modified. */
30116 goto decode_success;
30118 break;
30120 case 0xF7:
30121 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30122 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30123 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30124 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
30125 goto decode_success;
30127 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30128 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30129 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30130 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
30131 goto decode_success;
30133 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30134 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30135 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30136 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
30137 goto decode_success;
30139 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30140 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30141 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30142 Int size = getRexW(pfx) ? 8 : 4;
30143 IRType ty = szToITy(size);
30144 IRTemp dst = newTemp(ty);
30145 IRTemp src1 = newTemp(ty);
30146 IRTemp src2 = newTemp(ty);
30147 IRTemp stle = newTemp(Ity_I16);
30148 IRTemp start = newTemp(Ity_I8);
30149 IRTemp len = newTemp(Ity_I8);
30150 UChar rm = getUChar(delta);
30152 assign( src2, getIRegV(size,pfx) );
30153 if (epartIsReg(rm)) {
30154 assign( src1, getIRegE(size,pfx,rm) );
30155 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
30156 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30157 delta++;
30158 } else {
30159 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30160 assign( src1, loadLE(ty, mkexpr(addr)) );
30161 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30162 nameIRegG(size,pfx,rm));
30163 delta += alen;
30166 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
30167 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
30168 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
30169 /* if (start+len < opsize) {
30170 if (len != 0)
30171 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30172 else
30173 dst = 0;
30174 } else {
30175 if (start < opsize)
30176 dst = src1 u>> start;
30177 else
30178 dst = 0;
30179 } */
30180 assign( dst,
30181 IRExpr_ITE(
30182 binop(Iop_CmpLT32U,
30183 binop(Iop_Add32,
30184 unop(Iop_8Uto32, mkexpr(start)),
30185 unop(Iop_8Uto32, mkexpr(len))),
30186 mkU32(8*size)),
30187 IRExpr_ITE(
30188 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
30189 mkU(ty, 0),
30190 binop(mkSizedOp(ty,Iop_Shr8),
30191 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
30192 binop(Iop_Sub8,
30193 binop(Iop_Sub8, mkU8(8*size),
30194 mkexpr(start)),
30195 mkexpr(len))),
30196 binop(Iop_Sub8, mkU8(8*size),
30197 mkexpr(len)))
30199 IRExpr_ITE(
30200 binop(Iop_CmpLT32U,
30201 unop(Iop_8Uto32, mkexpr(start)),
30202 mkU32(8*size)),
30203 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
30204 mkexpr(start)),
30205 mkU(ty, 0)
30209 putIRegG( size, pfx, rm, mkexpr(dst) );
30210 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30211 ? AMD64G_CC_OP_ANDN64
30212 : AMD64G_CC_OP_ANDN32)) );
30213 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30214 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
30215 *uses_vvvv = True;
30216 goto decode_success;
30218 break;
30220 default:
30221 break;
30225 //decode_failure:
30226 return deltaIN;
30228 decode_success:
30229 return delta;
30232 /* operand format:
30233 * [0] = dst
30234 * [n] = srcn
30236 static Long decode_vregW(Int count, Long delta, UChar modrm, Prefix pfx,
30237 const VexAbiInfo* vbi, IRTemp *v, UInt *dst, Int swap)
30239 v[0] = newTemp(Ity_V128);
30240 v[1] = newTemp(Ity_V128);
30241 v[2] = newTemp(Ity_V128);
30242 v[3] = newTemp(Ity_V128);
30243 IRTemp addr = IRTemp_INVALID;
30244 Int alen = 0;
30245 HChar dis_buf[50];
30247 *dst = gregOfRexRM(pfx, modrm);
30248 assign( v[0], getXMMReg(*dst) );
30250 if ( epartIsReg( modrm ) ) {
30251 UInt ereg = eregOfRexRM(pfx, modrm);
30252 assign(swap ? v[count-1] : v[count-2], getXMMReg(ereg) );
30253 DIS(dis_buf, "%s", nameXMMReg(ereg));
30254 } else {
30255 Bool extra_byte = (getUChar(delta - 3) & 0xF) != 9;
30256 addr = disAMode(&alen, vbi, pfx, delta, dis_buf, extra_byte);
30257 assign(swap ? v[count-1] : v[count-2], loadLE(Ity_V128, mkexpr(addr)));
30258 delta += alen - 1;
30261 UInt vvvv = getVexNvvvv(pfx);
30262 switch(count) {
30263 case 2:
30264 DIP( "%s,%s", nameXMMReg(*dst), dis_buf );
30265 break;
30266 case 3:
30267 assign( swap ? v[1] : v[2], getXMMReg(vvvv) );
30268 DIP( "%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv), dis_buf );
30269 break;
30270 case 4:
30272 assign( v[1], getXMMReg(vvvv) );
30273 UInt src2 = getUChar(delta + 1) >> 4;
30274 assign( swap ? v[2] : v[3], getXMMReg(src2) );
30275 DIP( "%s,%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv),
30276 nameXMMReg(src2), dis_buf );
30278 break;
30280 return delta + 1;
30283 static Long dis_FMA4 (Prefix pfx, Long delta, UChar opc,
30284 Bool* uses_vvvv, const VexAbiInfo* vbi )
30286 UInt dst;
30287 *uses_vvvv = True;
30289 UChar modrm = getUChar(delta);
30291 Bool zero_64F = False;
30292 Bool zero_96F = False;
30293 UInt is_F32 = ((opc & 0x01) == 0x00) ? 1 : 0;
30294 Bool neg = (opc & 0xF0) == 0x70;
30295 Bool alt = (opc & 0xF0) == 0x50;
30296 Bool sub = alt ? (opc & 0x0E) != 0x0E : (opc & 0x0C) == 0x0C;
30298 IRTemp operand[4];
30299 switch(opc & 0xF) {
30300 case 0x0A: zero_96F = (opc >> 4) != 0x05; break;
30301 case 0x0B: zero_64F = (opc >> 4) != 0x05; break;
30302 case 0x0E: zero_96F = (opc >> 4) != 0x05; break;
30303 case 0x0F: zero_64F = (opc >> 4) != 0x05; break;
30304 default: break;
30306 DIP("vfm%s", neg ? "n" : "");
30307 if(alt) DIP("%s", sub ? "add" : "sub");
30308 DIP("%s", sub ? "sub" : "add");
30309 DIP("%c ", (zero_64F || zero_96F) ? 's' : 'p');
30310 DIP("%c ", is_F32 ? 's' : 'd');
30311 delta = decode_vregW(4, delta, modrm, pfx, vbi, operand, &dst, getRexW(pfx));
30312 DIP("\n");
30313 IRExpr *src[3];
30315 void (*putXMM[2])(UInt,Int,IRExpr*) = {&putXMMRegLane64F, &putXMMRegLane32F};
30317 IROp size_op[] = {Iop_V128to64, Iop_V128HIto64, Iop_64to32, Iop_64HIto32};
30318 IROp neg_op[] = {Iop_NegF64, Iop_NegF32};
30319 int i, j;
30320 for(i = 0; i < is_F32 * 2 + 2; i++) {
30321 for(j = 0; j < 3; j++) {
30322 if(is_F32) {
30323 src[j] = unop(Iop_ReinterpI32asF32,
30324 unop(size_op[i%2+2],
30325 unop(size_op[i/2],
30326 mkexpr(operand[j + 1])
30329 } else {
30330 src[j] = unop(Iop_ReinterpI64asF64,
30331 unop(size_op[i%2],
30332 mkexpr(operand[j + 1])
30336 putXMM[is_F32](dst, i, IRExpr_Qop(is_F32 ? Iop_MAddF32 : Iop_MAddF64,
30337 get_FAKE_roundingmode(),
30338 neg ? unop(neg_op[is_F32], src[0])
30339 : src[0],
30340 src[1],
30341 sub ? unop(neg_op[is_F32], src[2])
30342 : src[2]
30344 if(alt) {
30345 sub = !sub;
30349 /* Zero out top bits of ymm/xmm register. */
30350 putYMMRegLane128( dst, 1, mkV128(0) );
30352 if(zero_64F || zero_96F) {
30353 putXMMRegLane64( dst, 1, IRExpr_Const(IRConst_U64(0)));
30356 if(zero_96F) {
30357 putXMMRegLane32( dst, 1, IRExpr_Const(IRConst_U32(0)));
30360 return delta+1;
30363 /*------------------------------------------------------------*/
30364 /*--- ---*/
30365 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30366 /*--- ---*/
30367 /*------------------------------------------------------------*/
30369 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
30371 vassert(imm8 < 256);
30372 IRTemp s3, s2, s1, s0;
30373 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30374 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
30375 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30376 : ((_nn)==2) ? s2 : s3)
30377 IRTemp res = newTemp(Ity_V128);
30378 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
30379 SEL((imm8 >> 4) & 3),
30380 SEL((imm8 >> 2) & 3),
30381 SEL((imm8 >> 0) & 3) ));
30382 # undef SEL
30383 return res;
30386 __attribute__((noinline))
30387 static
30388 Long dis_ESC_0F3A__VEX (
30389 /*MB_OUT*/DisResult* dres,
30390 /*OUT*/ Bool* uses_vvvv,
30391 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
30392 Bool resteerCisOk,
30393 void* callback_opaque,
30394 const VexArchInfo* archinfo,
30395 const VexAbiInfo* vbi,
30396 Prefix pfx, Int sz, Long deltaIN
30399 IRTemp addr = IRTemp_INVALID;
30400 Int alen = 0;
30401 HChar dis_buf[50];
30402 Long delta = deltaIN;
30403 UChar opc = getUChar(delta);
30404 delta++;
30405 *uses_vvvv = False;
30407 switch (opc) {
30409 case 0x00:
30410 case 0x01:
30411 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30412 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30413 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
30414 && 1==getRexW(pfx)/*W1*/) {
30415 UChar modrm = getUChar(delta);
30416 UInt imm8 = 0;
30417 UInt rG = gregOfRexRM(pfx, modrm);
30418 IRTemp sV = newTemp(Ity_V256);
30419 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
30420 if (epartIsReg(modrm)) {
30421 UInt rE = eregOfRexRM(pfx, modrm);
30422 delta += 1;
30423 imm8 = getUChar(delta);
30424 DIP("%s $%u,%s,%s\n",
30425 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
30426 assign(sV, getYMMReg(rE));
30427 } else {
30428 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30429 delta += alen;
30430 imm8 = getUChar(delta);
30431 DIP("%s $%u,%s,%s\n",
30432 name, imm8, dis_buf, nameYMMReg(rG));
30433 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30435 delta++;
30436 IRTemp s[4];
30437 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
30438 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
30439 IRTemp dV = newTemp(Ity_V256);
30440 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30441 mkexpr(s[(imm8 >> 6) & 3]),
30442 mkexpr(s[(imm8 >> 4) & 3]),
30443 mkexpr(s[(imm8 >> 2) & 3]),
30444 mkexpr(s[(imm8 >> 0) & 3])));
30445 putYMMReg(rG, mkexpr(dV));
30446 goto decode_success;
30448 break;
30450 case 0x02:
30451 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30452 if (have66noF2noF3(pfx)
30453 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30454 UChar modrm = getUChar(delta);
30455 UInt imm8 = 0;
30456 UInt rG = gregOfRexRM(pfx, modrm);
30457 UInt rV = getVexNvvvv(pfx);
30458 IRTemp sV = newTemp(Ity_V128);
30459 IRTemp dV = newTemp(Ity_V128);
30460 UInt i;
30461 IRTemp s[4], d[4];
30462 assign(sV, getXMMReg(rV));
30463 if (epartIsReg(modrm)) {
30464 UInt rE = eregOfRexRM(pfx, modrm);
30465 delta += 1;
30466 imm8 = getUChar(delta);
30467 DIP("vpblendd $%u,%s,%s,%s\n",
30468 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30469 assign(dV, getXMMReg(rE));
30470 } else {
30471 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30472 delta += alen;
30473 imm8 = getUChar(delta);
30474 DIP("vpblendd $%u,%s,%s,%s\n",
30475 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30476 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
30478 delta++;
30479 for (i = 0; i < 4; i++) {
30480 s[i] = IRTemp_INVALID;
30481 d[i] = IRTemp_INVALID;
30483 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
30484 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
30485 for (i = 0; i < 4; i++)
30486 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30487 putYMMRegLane128(rG, 1, mkV128(0));
30488 *uses_vvvv = True;
30489 goto decode_success;
30491 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30492 if (have66noF2noF3(pfx)
30493 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30494 UChar modrm = getUChar(delta);
30495 UInt imm8 = 0;
30496 UInt rG = gregOfRexRM(pfx, modrm);
30497 UInt rV = getVexNvvvv(pfx);
30498 IRTemp sV = newTemp(Ity_V256);
30499 IRTemp dV = newTemp(Ity_V256);
30500 UInt i;
30501 IRTemp s[8], d[8];
30502 assign(sV, getYMMReg(rV));
30503 if (epartIsReg(modrm)) {
30504 UInt rE = eregOfRexRM(pfx, modrm);
30505 delta += 1;
30506 imm8 = getUChar(delta);
30507 DIP("vpblendd $%u,%s,%s,%s\n",
30508 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30509 assign(dV, getYMMReg(rE));
30510 } else {
30511 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30512 delta += alen;
30513 imm8 = getUChar(delta);
30514 DIP("vpblendd $%u,%s,%s,%s\n",
30515 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30516 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
30518 delta++;
30519 for (i = 0; i < 8; i++) {
30520 s[i] = IRTemp_INVALID;
30521 d[i] = IRTemp_INVALID;
30523 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
30524 &s[3], &s[2], &s[1], &s[0] );
30525 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
30526 &d[3], &d[2], &d[1], &d[0] );
30527 for (i = 0; i < 8; i++)
30528 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30529 *uses_vvvv = True;
30530 goto decode_success;
30532 break;
30534 case 0x04:
30535 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30536 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30537 UChar modrm = getUChar(delta);
30538 UInt imm8 = 0;
30539 UInt rG = gregOfRexRM(pfx, modrm);
30540 IRTemp sV = newTemp(Ity_V256);
30541 if (epartIsReg(modrm)) {
30542 UInt rE = eregOfRexRM(pfx, modrm);
30543 delta += 1;
30544 imm8 = getUChar(delta);
30545 DIP("vpermilps $%u,%s,%s\n",
30546 imm8, nameYMMReg(rE), nameYMMReg(rG));
30547 assign(sV, getYMMReg(rE));
30548 } else {
30549 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30550 delta += alen;
30551 imm8 = getUChar(delta);
30552 DIP("vpermilps $%u,%s,%s\n",
30553 imm8, dis_buf, nameYMMReg(rG));
30554 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30556 delta++;
30557 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
30558 breakupV256toV128s( sV, &sVhi, &sVlo );
30559 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
30560 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
30561 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
30562 putYMMReg(rG, res);
30563 goto decode_success;
30565 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30566 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30567 UChar modrm = getUChar(delta);
30568 UInt imm8 = 0;
30569 UInt rG = gregOfRexRM(pfx, modrm);
30570 IRTemp sV = newTemp(Ity_V128);
30571 if (epartIsReg(modrm)) {
30572 UInt rE = eregOfRexRM(pfx, modrm);
30573 delta += 1;
30574 imm8 = getUChar(delta);
30575 DIP("vpermilps $%u,%s,%s\n",
30576 imm8, nameXMMReg(rE), nameXMMReg(rG));
30577 assign(sV, getXMMReg(rE));
30578 } else {
30579 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30580 delta += alen;
30581 imm8 = getUChar(delta);
30582 DIP("vpermilps $%u,%s,%s\n",
30583 imm8, dis_buf, nameXMMReg(rG));
30584 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30586 delta++;
30587 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
30588 goto decode_success;
30590 break;
30592 case 0x05:
30593 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30594 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30595 UChar modrm = getUChar(delta);
30596 UInt imm8 = 0;
30597 UInt rG = gregOfRexRM(pfx, modrm);
30598 IRTemp sV = newTemp(Ity_V128);
30599 if (epartIsReg(modrm)) {
30600 UInt rE = eregOfRexRM(pfx, modrm);
30601 delta += 1;
30602 imm8 = getUChar(delta);
30603 DIP("vpermilpd $%u,%s,%s\n",
30604 imm8, nameXMMReg(rE), nameXMMReg(rG));
30605 assign(sV, getXMMReg(rE));
30606 } else {
30607 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30608 delta += alen;
30609 imm8 = getUChar(delta);
30610 DIP("vpermilpd $%u,%s,%s\n",
30611 imm8, dis_buf, nameXMMReg(rG));
30612 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30614 delta++;
30615 IRTemp s1 = newTemp(Ity_I64);
30616 IRTemp s0 = newTemp(Ity_I64);
30617 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
30618 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
30619 IRTemp dV = newTemp(Ity_V128);
30620 assign(dV, binop(Iop_64HLtoV128,
30621 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30622 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30623 putYMMRegLoAndZU(rG, mkexpr(dV));
30624 goto decode_success;
30626 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30627 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30628 UChar modrm = getUChar(delta);
30629 UInt imm8 = 0;
30630 UInt rG = gregOfRexRM(pfx, modrm);
30631 IRTemp sV = newTemp(Ity_V256);
30632 if (epartIsReg(modrm)) {
30633 UInt rE = eregOfRexRM(pfx, modrm);
30634 delta += 1;
30635 imm8 = getUChar(delta);
30636 DIP("vpermilpd $%u,%s,%s\n",
30637 imm8, nameYMMReg(rE), nameYMMReg(rG));
30638 assign(sV, getYMMReg(rE));
30639 } else {
30640 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30641 delta += alen;
30642 imm8 = getUChar(delta);
30643 DIP("vpermilpd $%u,%s,%s\n",
30644 imm8, dis_buf, nameYMMReg(rG));
30645 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30647 delta++;
30648 IRTemp s3, s2, s1, s0;
30649 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30650 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
30651 IRTemp dV = newTemp(Ity_V256);
30652 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30653 mkexpr((imm8 & (1<<3)) ? s3 : s2),
30654 mkexpr((imm8 & (1<<2)) ? s3 : s2),
30655 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30656 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30657 putYMMReg(rG, mkexpr(dV));
30658 goto decode_success;
30660 break;
30662 case 0x06:
30663 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30664 if (have66noF2noF3(pfx)
30665 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30666 UChar modrm = getUChar(delta);
30667 UInt imm8 = 0;
30668 UInt rG = gregOfRexRM(pfx, modrm);
30669 UInt rV = getVexNvvvv(pfx);
30670 IRTemp s00 = newTemp(Ity_V128);
30671 IRTemp s01 = newTemp(Ity_V128);
30672 IRTemp s10 = newTemp(Ity_V128);
30673 IRTemp s11 = newTemp(Ity_V128);
30674 assign(s00, getYMMRegLane128(rV, 0));
30675 assign(s01, getYMMRegLane128(rV, 1));
30676 if (epartIsReg(modrm)) {
30677 UInt rE = eregOfRexRM(pfx, modrm);
30678 delta += 1;
30679 imm8 = getUChar(delta);
30680 DIP("vperm2f128 $%u,%s,%s,%s\n",
30681 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30682 assign(s10, getYMMRegLane128(rE, 0));
30683 assign(s11, getYMMRegLane128(rE, 1));
30684 } else {
30685 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30686 delta += alen;
30687 imm8 = getUChar(delta);
30688 DIP("vperm2f128 $%u,%s,%s,%s\n",
30689 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30690 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30691 mkexpr(addr), mkU64(0))));
30692 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30693 mkexpr(addr), mkU64(16))));
30695 delta++;
30696 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30697 : ((_nn)==2) ? s10 : s11)
30698 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30699 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30700 # undef SEL
30701 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30702 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30703 *uses_vvvv = True;
30704 goto decode_success;
30706 break;
30708 case 0x08:
30709 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30710 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30711 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30712 UChar modrm = getUChar(delta);
30713 UInt rG = gregOfRexRM(pfx, modrm);
30714 IRTemp src = newTemp(Ity_V128);
30715 IRTemp s0 = IRTemp_INVALID;
30716 IRTemp s1 = IRTemp_INVALID;
30717 IRTemp s2 = IRTemp_INVALID;
30718 IRTemp s3 = IRTemp_INVALID;
30719 IRTemp rm = newTemp(Ity_I32);
30720 Int imm = 0;
30722 modrm = getUChar(delta);
30724 if (epartIsReg(modrm)) {
30725 UInt rE = eregOfRexRM(pfx, modrm);
30726 assign( src, getXMMReg( rE ) );
30727 imm = getUChar(delta+1);
30728 if (imm & ~15) break;
30729 delta += 1+1;
30730 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30731 } else {
30732 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30733 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30734 imm = getUChar(delta+alen);
30735 if (imm & ~15) break;
30736 delta += alen+1;
30737 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30740 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30741 that encoding is the same as the encoding for IRRoundingMode,
30742 we can use that value directly in the IR as a rounding
30743 mode. */
30744 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30746 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
30747 putYMMRegLane128( rG, 1, mkV128(0) );
30748 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30749 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30750 putYMMRegLane32F( rG, 3, CVT(s3) );
30751 putYMMRegLane32F( rG, 2, CVT(s2) );
30752 putYMMRegLane32F( rG, 1, CVT(s1) );
30753 putYMMRegLane32F( rG, 0, CVT(s0) );
30754 # undef CVT
30755 goto decode_success;
30757 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30758 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30759 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30760 UChar modrm = getUChar(delta);
30761 UInt rG = gregOfRexRM(pfx, modrm);
30762 IRTemp src = newTemp(Ity_V256);
30763 IRTemp s0 = IRTemp_INVALID;
30764 IRTemp s1 = IRTemp_INVALID;
30765 IRTemp s2 = IRTemp_INVALID;
30766 IRTemp s3 = IRTemp_INVALID;
30767 IRTemp s4 = IRTemp_INVALID;
30768 IRTemp s5 = IRTemp_INVALID;
30769 IRTemp s6 = IRTemp_INVALID;
30770 IRTemp s7 = IRTemp_INVALID;
30771 IRTemp rm = newTemp(Ity_I32);
30772 Int imm = 0;
30774 modrm = getUChar(delta);
30776 if (epartIsReg(modrm)) {
30777 UInt rE = eregOfRexRM(pfx, modrm);
30778 assign( src, getYMMReg( rE ) );
30779 imm = getUChar(delta+1);
30780 if (imm & ~15) break;
30781 delta += 1+1;
30782 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30783 } else {
30784 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30785 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30786 imm = getUChar(delta+alen);
30787 if (imm & ~15) break;
30788 delta += alen+1;
30789 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30792 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30793 that encoding is the same as the encoding for IRRoundingMode,
30794 we can use that value directly in the IR as a rounding
30795 mode. */
30796 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30798 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
30799 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30800 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30801 putYMMRegLane32F( rG, 7, CVT(s7) );
30802 putYMMRegLane32F( rG, 6, CVT(s6) );
30803 putYMMRegLane32F( rG, 5, CVT(s5) );
30804 putYMMRegLane32F( rG, 4, CVT(s4) );
30805 putYMMRegLane32F( rG, 3, CVT(s3) );
30806 putYMMRegLane32F( rG, 2, CVT(s2) );
30807 putYMMRegLane32F( rG, 1, CVT(s1) );
30808 putYMMRegLane32F( rG, 0, CVT(s0) );
30809 # undef CVT
30810 goto decode_success;
30813 case 0x09:
30814 /* VROUNDPD imm8, xmm2/m128, xmm1 */
30815 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
30816 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30817 UChar modrm = getUChar(delta);
30818 UInt rG = gregOfRexRM(pfx, modrm);
30819 IRTemp src = newTemp(Ity_V128);
30820 IRTemp s0 = IRTemp_INVALID;
30821 IRTemp s1 = IRTemp_INVALID;
30822 IRTemp rm = newTemp(Ity_I32);
30823 Int imm = 0;
30825 modrm = getUChar(delta);
30827 if (epartIsReg(modrm)) {
30828 UInt rE = eregOfRexRM(pfx, modrm);
30829 assign( src, getXMMReg( rE ) );
30830 imm = getUChar(delta+1);
30831 if (imm & ~15) break;
30832 delta += 1+1;
30833 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30834 } else {
30835 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30836 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30837 imm = getUChar(delta+alen);
30838 if (imm & ~15) break;
30839 delta += alen+1;
30840 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30843 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30844 that encoding is the same as the encoding for IRRoundingMode,
30845 we can use that value directly in the IR as a rounding
30846 mode. */
30847 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30849 breakupV128to64s( src, &s1, &s0 );
30850 putYMMRegLane128( rG, 1, mkV128(0) );
30851 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30852 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30853 putYMMRegLane64F( rG, 1, CVT(s1) );
30854 putYMMRegLane64F( rG, 0, CVT(s0) );
30855 # undef CVT
30856 goto decode_success;
30858 /* VROUNDPD imm8, ymm2/m256, ymm1 */
30859 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
30860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30861 UChar modrm = getUChar(delta);
30862 UInt rG = gregOfRexRM(pfx, modrm);
30863 IRTemp src = newTemp(Ity_V256);
30864 IRTemp s0 = IRTemp_INVALID;
30865 IRTemp s1 = IRTemp_INVALID;
30866 IRTemp s2 = IRTemp_INVALID;
30867 IRTemp s3 = IRTemp_INVALID;
30868 IRTemp rm = newTemp(Ity_I32);
30869 Int imm = 0;
30871 modrm = getUChar(delta);
30873 if (epartIsReg(modrm)) {
30874 UInt rE = eregOfRexRM(pfx, modrm);
30875 assign( src, getYMMReg( rE ) );
30876 imm = getUChar(delta+1);
30877 if (imm & ~15) break;
30878 delta += 1+1;
30879 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30880 } else {
30881 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30882 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30883 imm = getUChar(delta+alen);
30884 if (imm & ~15) break;
30885 delta += alen+1;
30886 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30889 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30890 that encoding is the same as the encoding for IRRoundingMode,
30891 we can use that value directly in the IR as a rounding
30892 mode. */
30893 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30895 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
30896 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30897 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30898 putYMMRegLane64F( rG, 3, CVT(s3) );
30899 putYMMRegLane64F( rG, 2, CVT(s2) );
30900 putYMMRegLane64F( rG, 1, CVT(s1) );
30901 putYMMRegLane64F( rG, 0, CVT(s0) );
30902 # undef CVT
30903 goto decode_success;
30906 case 0x0A:
30907 case 0x0B:
30908 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
30909 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
30910 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
30911 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
30912 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30913 UChar modrm = getUChar(delta);
30914 UInt rG = gregOfRexRM(pfx, modrm);
30915 UInt rV = getVexNvvvv(pfx);
30916 Bool isD = opc == 0x0B;
30917 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
30918 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
30919 Int imm = 0;
30921 if (epartIsReg(modrm)) {
30922 UInt rE = eregOfRexRM(pfx, modrm);
30923 assign( src,
30924 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
30925 imm = getUChar(delta+1);
30926 if (imm & ~15) break;
30927 delta += 1+1;
30928 DIP( "vrounds%c $%d,%s,%s,%s\n",
30929 isD ? 'd' : 's',
30930 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
30931 } else {
30932 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30933 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
30934 imm = getUChar(delta+alen);
30935 if (imm & ~15) break;
30936 delta += alen+1;
30937 DIP( "vrounds%c $%d,%s,%s,%s\n",
30938 isD ? 'd' : 's',
30939 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
30942 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30943 that encoding is the same as the encoding for IRRoundingMode,
30944 we can use that value directly in the IR as a rounding
30945 mode. */
30946 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
30947 (imm & 4) ? get_sse_roundingmode()
30948 : mkU32(imm & 3),
30949 mkexpr(src)) );
30951 if (isD)
30952 putXMMRegLane64F( rG, 0, mkexpr(res) );
30953 else {
30954 putXMMRegLane32F( rG, 0, mkexpr(res) );
30955 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
30957 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
30958 putYMMRegLane128( rG, 1, mkV128(0) );
30959 *uses_vvvv = True;
30960 goto decode_success;
30962 break;
30964 case 0x0C:
30965 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
30966 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
30967 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30968 UChar modrm = getUChar(delta);
30969 UInt imm8;
30970 UInt rG = gregOfRexRM(pfx, modrm);
30971 UInt rV = getVexNvvvv(pfx);
30972 IRTemp sV = newTemp(Ity_V256);
30973 IRTemp sE = newTemp(Ity_V256);
30974 assign ( sV, getYMMReg(rV) );
30975 if (epartIsReg(modrm)) {
30976 UInt rE = eregOfRexRM(pfx, modrm);
30977 delta += 1;
30978 imm8 = getUChar(delta);
30979 DIP("vblendps $%u,%s,%s,%s\n",
30980 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30981 assign(sE, getYMMReg(rE));
30982 } else {
30983 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30984 delta += alen;
30985 imm8 = getUChar(delta);
30986 DIP("vblendps $%u,%s,%s,%s\n",
30987 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30988 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30990 delta++;
30991 putYMMReg( rG,
30992 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
30993 *uses_vvvv = True;
30994 goto decode_success;
30996 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
30997 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
30998 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30999 UChar modrm = getUChar(delta);
31000 UInt imm8;
31001 UInt rG = gregOfRexRM(pfx, modrm);
31002 UInt rV = getVexNvvvv(pfx);
31003 IRTemp sV = newTemp(Ity_V128);
31004 IRTemp sE = newTemp(Ity_V128);
31005 assign ( sV, getXMMReg(rV) );
31006 if (epartIsReg(modrm)) {
31007 UInt rE = eregOfRexRM(pfx, modrm);
31008 delta += 1;
31009 imm8 = getUChar(delta);
31010 DIP("vblendps $%u,%s,%s,%s\n",
31011 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31012 assign(sE, getXMMReg(rE));
31013 } else {
31014 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31015 delta += alen;
31016 imm8 = getUChar(delta);
31017 DIP("vblendps $%u,%s,%s,%s\n",
31018 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31019 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31021 delta++;
31022 putYMMRegLoAndZU( rG,
31023 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
31024 *uses_vvvv = True;
31025 goto decode_success;
31027 break;
31029 case 0x0D:
31030 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31031 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31032 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31033 UChar modrm = getUChar(delta);
31034 UInt imm8;
31035 UInt rG = gregOfRexRM(pfx, modrm);
31036 UInt rV = getVexNvvvv(pfx);
31037 IRTemp sV = newTemp(Ity_V256);
31038 IRTemp sE = newTemp(Ity_V256);
31039 assign ( sV, getYMMReg(rV) );
31040 if (epartIsReg(modrm)) {
31041 UInt rE = eregOfRexRM(pfx, modrm);
31042 delta += 1;
31043 imm8 = getUChar(delta);
31044 DIP("vblendpd $%u,%s,%s,%s\n",
31045 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31046 assign(sE, getYMMReg(rE));
31047 } else {
31048 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31049 delta += alen;
31050 imm8 = getUChar(delta);
31051 DIP("vblendpd $%u,%s,%s,%s\n",
31052 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31053 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31055 delta++;
31056 putYMMReg( rG,
31057 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
31058 *uses_vvvv = True;
31059 goto decode_success;
31061 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31062 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31063 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31064 UChar modrm = getUChar(delta);
31065 UInt imm8;
31066 UInt rG = gregOfRexRM(pfx, modrm);
31067 UInt rV = getVexNvvvv(pfx);
31068 IRTemp sV = newTemp(Ity_V128);
31069 IRTemp sE = newTemp(Ity_V128);
31070 assign ( sV, getXMMReg(rV) );
31071 if (epartIsReg(modrm)) {
31072 UInt rE = eregOfRexRM(pfx, modrm);
31073 delta += 1;
31074 imm8 = getUChar(delta);
31075 DIP("vblendpd $%u,%s,%s,%s\n",
31076 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31077 assign(sE, getXMMReg(rE));
31078 } else {
31079 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31080 delta += alen;
31081 imm8 = getUChar(delta);
31082 DIP("vblendpd $%u,%s,%s,%s\n",
31083 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31084 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31086 delta++;
31087 putYMMRegLoAndZU( rG,
31088 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
31089 *uses_vvvv = True;
31090 goto decode_success;
31092 break;
31094 case 0x0E:
31095 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31096 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31097 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31098 UChar modrm = getUChar(delta);
31099 UInt imm8;
31100 UInt rG = gregOfRexRM(pfx, modrm);
31101 UInt rV = getVexNvvvv(pfx);
31102 IRTemp sV = newTemp(Ity_V128);
31103 IRTemp sE = newTemp(Ity_V128);
31104 assign ( sV, getXMMReg(rV) );
31105 if (epartIsReg(modrm)) {
31106 UInt rE = eregOfRexRM(pfx, modrm);
31107 delta += 1;
31108 imm8 = getUChar(delta);
31109 DIP("vpblendw $%u,%s,%s,%s\n",
31110 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31111 assign(sE, getXMMReg(rE));
31112 } else {
31113 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31114 delta += alen;
31115 imm8 = getUChar(delta);
31116 DIP("vpblendw $%u,%s,%s,%s\n",
31117 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31118 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31120 delta++;
31121 putYMMRegLoAndZU( rG,
31122 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
31123 *uses_vvvv = True;
31124 goto decode_success;
31126 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31127 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31128 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31129 UChar modrm = getUChar(delta);
31130 UInt imm8;
31131 UInt rG = gregOfRexRM(pfx, modrm);
31132 UInt rV = getVexNvvvv(pfx);
31133 IRTemp sV = newTemp(Ity_V256);
31134 IRTemp sE = newTemp(Ity_V256);
31135 IRTemp sVhi, sVlo, sEhi, sElo;
31136 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
31137 assign ( sV, getYMMReg(rV) );
31138 if (epartIsReg(modrm)) {
31139 UInt rE = eregOfRexRM(pfx, modrm);
31140 delta += 1;
31141 imm8 = getUChar(delta);
31142 DIP("vpblendw $%u,%s,%s,%s\n",
31143 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31144 assign(sE, getYMMReg(rE));
31145 } else {
31146 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31147 delta += alen;
31148 imm8 = getUChar(delta);
31149 DIP("vpblendw $%u,%s,%s,%s\n",
31150 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31151 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31153 delta++;
31154 breakupV256toV128s( sV, &sVhi, &sVlo );
31155 breakupV256toV128s( sE, &sEhi, &sElo );
31156 putYMMReg( rG, binop( Iop_V128HLtoV256,
31157 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
31158 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
31159 *uses_vvvv = True;
31160 goto decode_success;
31162 break;
31164 case 0x0F:
31165 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31166 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31167 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31168 UChar modrm = getUChar(delta);
31169 UInt rG = gregOfRexRM(pfx, modrm);
31170 UInt rV = getVexNvvvv(pfx);
31171 IRTemp sV = newTemp(Ity_V128);
31172 IRTemp dV = newTemp(Ity_V128);
31173 UInt imm8;
31175 assign( dV, getXMMReg(rV) );
31177 if ( epartIsReg( modrm ) ) {
31178 UInt rE = eregOfRexRM(pfx, modrm);
31179 assign( sV, getXMMReg(rE) );
31180 imm8 = getUChar(delta+1);
31181 delta += 1+1;
31182 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameXMMReg(rE),
31183 nameXMMReg(rV), nameXMMReg(rG));
31184 } else {
31185 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31186 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
31187 imm8 = getUChar(delta+alen);
31188 delta += alen+1;
31189 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31190 nameXMMReg(rV), nameXMMReg(rG));
31193 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
31194 putYMMRegLoAndZU( rG, mkexpr(res) );
31195 *uses_vvvv = True;
31196 goto decode_success;
31198 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31199 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31200 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31201 UChar modrm = getUChar(delta);
31202 UInt rG = gregOfRexRM(pfx, modrm);
31203 UInt rV = getVexNvvvv(pfx);
31204 IRTemp sV = newTemp(Ity_V256);
31205 IRTemp dV = newTemp(Ity_V256);
31206 IRTemp sHi, sLo, dHi, dLo;
31207 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31208 UInt imm8;
31210 assign( dV, getYMMReg(rV) );
31212 if ( epartIsReg( modrm ) ) {
31213 UInt rE = eregOfRexRM(pfx, modrm);
31214 assign( sV, getYMMReg(rE) );
31215 imm8 = getUChar(delta+1);
31216 delta += 1+1;
31217 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameYMMReg(rE),
31218 nameYMMReg(rV), nameYMMReg(rG));
31219 } else {
31220 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31221 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
31222 imm8 = getUChar(delta+alen);
31223 delta += alen+1;
31224 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31225 nameYMMReg(rV), nameYMMReg(rG));
31228 breakupV256toV128s( dV, &dHi, &dLo );
31229 breakupV256toV128s( sV, &sHi, &sLo );
31230 putYMMReg( rG, binop( Iop_V128HLtoV256,
31231 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
31232 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
31234 *uses_vvvv = True;
31235 goto decode_success;
31237 break;
31239 case 0x14:
31240 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31241 if (have66noF2noF3(pfx)
31242 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31243 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
31244 goto decode_success;
31246 break;
31248 case 0x15:
31249 /* VPEXTRW imm8, reg/m16, xmm2 */
31250 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31251 if (have66noF2noF3(pfx)
31252 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31253 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
31254 goto decode_success;
31256 break;
31258 case 0x16:
31259 /* VPEXTRD imm8, r32/m32, xmm2 */
31260 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31261 if (have66noF2noF3(pfx)
31262 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31263 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
31264 goto decode_success;
31266 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31267 if (have66noF2noF3(pfx)
31268 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31269 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
31270 goto decode_success;
31272 break;
31274 case 0x17:
31275 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31276 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31277 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
31278 goto decode_success;
31280 break;
31282 case 0x18:
31283 /* VINSERTF128 r/m, rV, rD
31284 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31285 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31286 if (have66noF2noF3(pfx)
31287 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31288 UChar modrm = getUChar(delta);
31289 UInt ib = 0;
31290 UInt rG = gregOfRexRM(pfx, modrm);
31291 UInt rV = getVexNvvvv(pfx);
31292 IRTemp t128 = newTemp(Ity_V128);
31293 if (epartIsReg(modrm)) {
31294 UInt rE = eregOfRexRM(pfx, modrm);
31295 delta += 1;
31296 assign(t128, getXMMReg(rE));
31297 ib = getUChar(delta);
31298 DIP("vinsertf128 $%u,%s,%s,%s\n",
31299 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31300 } else {
31301 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31302 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31303 delta += alen;
31304 ib = getUChar(delta);
31305 DIP("vinsertf128 $%u,%s,%s,%s\n",
31306 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31308 delta++;
31309 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31310 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31311 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31312 *uses_vvvv = True;
31313 goto decode_success;
31315 break;
31317 case 0x19:
31318 /* VEXTRACTF128 $lane_no, rS, r/m
31319 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31320 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31321 if (have66noF2noF3(pfx)
31322 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31323 UChar modrm = getUChar(delta);
31324 UInt ib = 0;
31325 UInt rS = gregOfRexRM(pfx, modrm);
31326 IRTemp t128 = newTemp(Ity_V128);
31327 if (epartIsReg(modrm)) {
31328 UInt rD = eregOfRexRM(pfx, modrm);
31329 delta += 1;
31330 ib = getUChar(delta);
31331 assign(t128, getYMMRegLane128(rS, ib & 1));
31332 putYMMRegLoAndZU(rD, mkexpr(t128));
31333 DIP("vextractf128 $%u,%s,%s\n",
31334 ib, nameXMMReg(rS), nameYMMReg(rD));
31335 } else {
31336 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31337 delta += alen;
31338 ib = getUChar(delta);
31339 assign(t128, getYMMRegLane128(rS, ib & 1));
31340 storeLE(mkexpr(addr), mkexpr(t128));
31341 DIP("vextractf128 $%u,%s,%s\n",
31342 ib, nameYMMReg(rS), dis_buf);
31344 delta++;
31345 /* doesn't use vvvv */
31346 goto decode_success;
31348 break;
31350 case 0x20:
31351 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31352 if (have66noF2noF3(pfx)
31353 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31354 UChar modrm = getUChar(delta);
31355 UInt rG = gregOfRexRM(pfx, modrm);
31356 UInt rV = getVexNvvvv(pfx);
31357 Int imm8;
31358 IRTemp src_u8 = newTemp(Ity_I8);
31360 if ( epartIsReg( modrm ) ) {
31361 UInt rE = eregOfRexRM(pfx,modrm);
31362 imm8 = (Int)(getUChar(delta+1) & 15);
31363 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
31364 delta += 1+1;
31365 DIP( "vpinsrb $%d,%s,%s,%s\n",
31366 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31367 } else {
31368 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31369 imm8 = (Int)(getUChar(delta+alen) & 15);
31370 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
31371 delta += alen+1;
31372 DIP( "vpinsrb $%d,%s,%s,%s\n",
31373 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31376 IRTemp src_vec = newTemp(Ity_V128);
31377 assign(src_vec, getXMMReg( rV ));
31378 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
31379 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31380 *uses_vvvv = True;
31381 goto decode_success;
31383 break;
31385 case 0x21:
31386 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31387 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31388 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31389 UChar modrm = getUChar(delta);
31390 UInt rG = gregOfRexRM(pfx, modrm);
31391 UInt rV = getVexNvvvv(pfx);
31392 UInt imm8;
31393 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
31394 const IRTemp inval = IRTemp_INVALID;
31396 if ( epartIsReg( modrm ) ) {
31397 UInt rE = eregOfRexRM(pfx, modrm);
31398 IRTemp vE = newTemp(Ity_V128);
31399 assign( vE, getXMMReg(rE) );
31400 IRTemp dsE[4] = { inval, inval, inval, inval };
31401 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
31402 imm8 = getUChar(delta+1);
31403 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
31404 delta += 1+1;
31405 DIP( "insertps $%u, %s,%s\n",
31406 imm8, nameXMMReg(rE), nameXMMReg(rG) );
31407 } else {
31408 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31409 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
31410 imm8 = getUChar(delta+alen);
31411 delta += alen+1;
31412 DIP( "insertps $%u, %s,%s\n",
31413 imm8, dis_buf, nameXMMReg(rG) );
31416 IRTemp vV = newTemp(Ity_V128);
31417 assign( vV, getXMMReg(rV) );
31419 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
31420 *uses_vvvv = True;
31421 goto decode_success;
31423 break;
31425 case 0x22:
31426 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31427 if (have66noF2noF3(pfx)
31428 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31429 UChar modrm = getUChar(delta);
31430 UInt rG = gregOfRexRM(pfx, modrm);
31431 UInt rV = getVexNvvvv(pfx);
31432 Int imm8_10;
31433 IRTemp src_u32 = newTemp(Ity_I32);
31435 if ( epartIsReg( modrm ) ) {
31436 UInt rE = eregOfRexRM(pfx,modrm);
31437 imm8_10 = (Int)(getUChar(delta+1) & 3);
31438 assign( src_u32, getIReg32( rE ) );
31439 delta += 1+1;
31440 DIP( "vpinsrd $%d,%s,%s,%s\n",
31441 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31442 } else {
31443 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31444 imm8_10 = (Int)(getUChar(delta+alen) & 3);
31445 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
31446 delta += alen+1;
31447 DIP( "vpinsrd $%d,%s,%s,%s\n",
31448 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31451 IRTemp src_vec = newTemp(Ity_V128);
31452 assign(src_vec, getXMMReg( rV ));
31453 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
31454 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31455 *uses_vvvv = True;
31456 goto decode_success;
31458 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31459 if (have66noF2noF3(pfx)
31460 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31461 UChar modrm = getUChar(delta);
31462 UInt rG = gregOfRexRM(pfx, modrm);
31463 UInt rV = getVexNvvvv(pfx);
31464 Int imm8_0;
31465 IRTemp src_u64 = newTemp(Ity_I64);
31467 if ( epartIsReg( modrm ) ) {
31468 UInt rE = eregOfRexRM(pfx,modrm);
31469 imm8_0 = (Int)(getUChar(delta+1) & 1);
31470 assign( src_u64, getIReg64( rE ) );
31471 delta += 1+1;
31472 DIP( "vpinsrq $%d,%s,%s,%s\n",
31473 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
31474 } else {
31475 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31476 imm8_0 = (Int)(getUChar(delta+alen) & 1);
31477 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
31478 delta += alen+1;
31479 DIP( "vpinsrd $%d,%s,%s,%s\n",
31480 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31483 IRTemp src_vec = newTemp(Ity_V128);
31484 assign(src_vec, getXMMReg( rV ));
31485 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
31486 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31487 *uses_vvvv = True;
31488 goto decode_success;
31490 break;
31492 case 0x38:
31493 /* VINSERTI128 r/m, rV, rD
31494 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31495 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31496 if (have66noF2noF3(pfx)
31497 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31498 UChar modrm = getUChar(delta);
31499 UInt ib = 0;
31500 UInt rG = gregOfRexRM(pfx, modrm);
31501 UInt rV = getVexNvvvv(pfx);
31502 IRTemp t128 = newTemp(Ity_V128);
31503 if (epartIsReg(modrm)) {
31504 UInt rE = eregOfRexRM(pfx, modrm);
31505 delta += 1;
31506 assign(t128, getXMMReg(rE));
31507 ib = getUChar(delta);
31508 DIP("vinserti128 $%u,%s,%s,%s\n",
31509 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31510 } else {
31511 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31512 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31513 delta += alen;
31514 ib = getUChar(delta);
31515 DIP("vinserti128 $%u,%s,%s,%s\n",
31516 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31518 delta++;
31519 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31520 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31521 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31522 *uses_vvvv = True;
31523 goto decode_success;
31525 break;
31527 case 0x39:
31528 /* VEXTRACTI128 $lane_no, rS, r/m
31529 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31530 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31531 if (have66noF2noF3(pfx)
31532 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31533 UChar modrm = getUChar(delta);
31534 UInt ib = 0;
31535 UInt rS = gregOfRexRM(pfx, modrm);
31536 IRTemp t128 = newTemp(Ity_V128);
31537 if (epartIsReg(modrm)) {
31538 UInt rD = eregOfRexRM(pfx, modrm);
31539 delta += 1;
31540 ib = getUChar(delta);
31541 assign(t128, getYMMRegLane128(rS, ib & 1));
31542 putYMMRegLoAndZU(rD, mkexpr(t128));
31543 DIP("vextracti128 $%u,%s,%s\n",
31544 ib, nameXMMReg(rS), nameYMMReg(rD));
31545 } else {
31546 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31547 delta += alen;
31548 ib = getUChar(delta);
31549 assign(t128, getYMMRegLane128(rS, ib & 1));
31550 storeLE(mkexpr(addr), mkexpr(t128));
31551 DIP("vextracti128 $%u,%s,%s\n",
31552 ib, nameYMMReg(rS), dis_buf);
31554 delta++;
31555 /* doesn't use vvvv */
31556 goto decode_success;
31558 break;
31560 case 0x40:
31561 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31562 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31563 UChar modrm = getUChar(delta);
31564 UInt rG = gregOfRexRM(pfx, modrm);
31565 UInt rV = getVexNvvvv(pfx);
31566 IRTemp dst_vec = newTemp(Ity_V128);
31567 Int imm8;
31568 if (epartIsReg( modrm )) {
31569 UInt rE = eregOfRexRM(pfx,modrm);
31570 imm8 = (Int)getUChar(delta+1);
31571 assign( dst_vec, getXMMReg( rE ) );
31572 delta += 1+1;
31573 DIP( "vdpps $%d,%s,%s,%s\n",
31574 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31575 } else {
31576 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31577 imm8 = (Int)getUChar(delta+alen);
31578 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31579 delta += alen+1;
31580 DIP( "vdpps $%d,%s,%s,%s\n",
31581 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31584 IRTemp src_vec = newTemp(Ity_V128);
31585 assign(src_vec, getXMMReg( rV ));
31586 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
31587 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31588 *uses_vvvv = True;
31589 goto decode_success;
31591 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31592 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31593 UChar modrm = getUChar(delta);
31594 UInt rG = gregOfRexRM(pfx, modrm);
31595 UInt rV = getVexNvvvv(pfx);
31596 IRTemp dst_vec = newTemp(Ity_V256);
31597 Int imm8;
31598 if (epartIsReg( modrm )) {
31599 UInt rE = eregOfRexRM(pfx,modrm);
31600 imm8 = (Int)getUChar(delta+1);
31601 assign( dst_vec, getYMMReg( rE ) );
31602 delta += 1+1;
31603 DIP( "vdpps $%d,%s,%s,%s\n",
31604 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31605 } else {
31606 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31607 imm8 = (Int)getUChar(delta+alen);
31608 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31609 delta += alen+1;
31610 DIP( "vdpps $%d,%s,%s,%s\n",
31611 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31614 IRTemp src_vec = newTemp(Ity_V256);
31615 assign(src_vec, getYMMReg( rV ));
31616 IRTemp s0, s1, d0, d1;
31617 s0 = s1 = d0 = d1 = IRTemp_INVALID;
31618 breakupV256toV128s( dst_vec, &d1, &d0 );
31619 breakupV256toV128s( src_vec, &s1, &s0 );
31620 putYMMReg( rG, binop( Iop_V128HLtoV256,
31621 mkexpr( math_DPPS_128(s1, d1, imm8) ),
31622 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
31623 *uses_vvvv = True;
31624 goto decode_success;
31626 break;
31628 case 0x41:
31629 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31630 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31631 UChar modrm = getUChar(delta);
31632 UInt rG = gregOfRexRM(pfx, modrm);
31633 UInt rV = getVexNvvvv(pfx);
31634 IRTemp dst_vec = newTemp(Ity_V128);
31635 Int imm8;
31636 if (epartIsReg( modrm )) {
31637 UInt rE = eregOfRexRM(pfx,modrm);
31638 imm8 = (Int)getUChar(delta+1);
31639 assign( dst_vec, getXMMReg( rE ) );
31640 delta += 1+1;
31641 DIP( "vdppd $%d,%s,%s,%s\n",
31642 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31643 } else {
31644 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31645 imm8 = (Int)getUChar(delta+alen);
31646 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31647 delta += alen+1;
31648 DIP( "vdppd $%d,%s,%s,%s\n",
31649 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31652 IRTemp src_vec = newTemp(Ity_V128);
31653 assign(src_vec, getXMMReg( rV ));
31654 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
31655 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31656 *uses_vvvv = True;
31657 goto decode_success;
31659 break;
31661 case 0x42:
31662 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31663 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31664 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31665 UChar modrm = getUChar(delta);
31666 Int imm8;
31667 IRTemp src_vec = newTemp(Ity_V128);
31668 IRTemp dst_vec = newTemp(Ity_V128);
31669 UInt rG = gregOfRexRM(pfx, modrm);
31670 UInt rV = getVexNvvvv(pfx);
31672 assign( dst_vec, getXMMReg(rV) );
31674 if ( epartIsReg( modrm ) ) {
31675 UInt rE = eregOfRexRM(pfx, modrm);
31677 imm8 = (Int)getUChar(delta+1);
31678 assign( src_vec, getXMMReg(rE) );
31679 delta += 1+1;
31680 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31681 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31682 } else {
31683 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31684 1/* imm8 is 1 byte after the amode */ );
31685 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31686 imm8 = (Int)getUChar(delta+alen);
31687 delta += alen+1;
31688 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31689 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31692 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
31693 src_vec, imm8) ) );
31694 *uses_vvvv = True;
31695 goto decode_success;
31697 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31698 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31699 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31700 UChar modrm = getUChar(delta);
31701 Int imm8;
31702 IRTemp src_vec = newTemp(Ity_V256);
31703 IRTemp dst_vec = newTemp(Ity_V256);
31704 UInt rG = gregOfRexRM(pfx, modrm);
31705 UInt rV = getVexNvvvv(pfx);
31706 IRTemp sHi, sLo, dHi, dLo;
31707 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31709 assign( dst_vec, getYMMReg(rV) );
31711 if ( epartIsReg( modrm ) ) {
31712 UInt rE = eregOfRexRM(pfx, modrm);
31714 imm8 = (Int)getUChar(delta+1);
31715 assign( src_vec, getYMMReg(rE) );
31716 delta += 1+1;
31717 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31718 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31719 } else {
31720 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31721 1/* imm8 is 1 byte after the amode */ );
31722 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31723 imm8 = (Int)getUChar(delta+alen);
31724 delta += alen+1;
31725 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31726 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31729 breakupV256toV128s( dst_vec, &dHi, &dLo );
31730 breakupV256toV128s( src_vec, &sHi, &sLo );
31731 putYMMReg( rG, binop( Iop_V128HLtoV256,
31732 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
31733 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
31734 *uses_vvvv = True;
31735 goto decode_success;
31737 break;
31739 case 0x44:
31740 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31741 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31742 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31743 * Carry-less multiplication of selected XMM quadwords into XMM
31744 * registers (a.k.a multiplication of polynomials over GF(2))
31746 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31747 UChar modrm = getUChar(delta);
31748 Int imm8;
31749 IRTemp sV = newTemp(Ity_V128);
31750 IRTemp dV = newTemp(Ity_V128);
31751 UInt rG = gregOfRexRM(pfx, modrm);
31752 UInt rV = getVexNvvvv(pfx);
31754 assign( dV, getXMMReg(rV) );
31756 if ( epartIsReg( modrm ) ) {
31757 UInt rE = eregOfRexRM(pfx, modrm);
31758 imm8 = (Int)getUChar(delta+1);
31759 assign( sV, getXMMReg(rE) );
31760 delta += 1+1;
31761 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
31762 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31763 } else {
31764 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31765 1/* imm8 is 1 byte after the amode */ );
31766 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
31767 imm8 = (Int)getUChar(delta+alen);
31768 delta += alen+1;
31769 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31770 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31773 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
31774 *uses_vvvv = True;
31775 goto decode_success;
31777 break;
31779 case 0x46:
31780 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
31781 if (have66noF2noF3(pfx)
31782 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31783 UChar modrm = getUChar(delta);
31784 UInt imm8 = 0;
31785 UInt rG = gregOfRexRM(pfx, modrm);
31786 UInt rV = getVexNvvvv(pfx);
31787 IRTemp s00 = newTemp(Ity_V128);
31788 IRTemp s01 = newTemp(Ity_V128);
31789 IRTemp s10 = newTemp(Ity_V128);
31790 IRTemp s11 = newTemp(Ity_V128);
31791 assign(s00, getYMMRegLane128(rV, 0));
31792 assign(s01, getYMMRegLane128(rV, 1));
31793 if (epartIsReg(modrm)) {
31794 UInt rE = eregOfRexRM(pfx, modrm);
31795 delta += 1;
31796 imm8 = getUChar(delta);
31797 DIP("vperm2i128 $%u,%s,%s,%s\n",
31798 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31799 assign(s10, getYMMRegLane128(rE, 0));
31800 assign(s11, getYMMRegLane128(rE, 1));
31801 } else {
31802 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31803 delta += alen;
31804 imm8 = getUChar(delta);
31805 DIP("vperm2i128 $%u,%s,%s,%s\n",
31806 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31807 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
31808 mkexpr(addr), mkU64(0))));
31809 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
31810 mkexpr(addr), mkU64(16))));
31812 delta++;
31813 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
31814 : ((_nn)==2) ? s10 : s11)
31815 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
31816 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
31817 # undef SEL
31818 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
31819 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
31820 *uses_vvvv = True;
31821 goto decode_success;
31823 break;
31825 case 0x4A:
31826 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
31827 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31828 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
31829 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31830 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31831 "vblendvps", 4, Iop_SarN32x4 );
31832 *uses_vvvv = True;
31833 goto decode_success;
31835 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
31836 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31837 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
31838 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31839 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31840 "vblendvps", 4, Iop_SarN32x4 );
31841 *uses_vvvv = True;
31842 goto decode_success;
31844 break;
31846 case 0x4B:
31847 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
31848 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31849 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
31850 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31851 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31852 "vblendvpd", 8, Iop_SarN64x2 );
31853 *uses_vvvv = True;
31854 goto decode_success;
31856 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
31857 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31858 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
31859 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31860 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31861 "vblendvpd", 8, Iop_SarN64x2 );
31862 *uses_vvvv = True;
31863 goto decode_success;
31865 break;
31867 case 0x4C:
31868 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
31869 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31870 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
31871 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31872 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31873 "vpblendvb", 1, Iop_SarN8x16 );
31874 *uses_vvvv = True;
31875 goto decode_success;
31877 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
31878 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31879 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
31880 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31881 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31882 "vpblendvb", 1, Iop_SarN8x16 );
31883 *uses_vvvv = True;
31884 goto decode_success;
31886 break;
31888 case 0x60:
31889 case 0x61:
31890 case 0x62:
31891 case 0x63:
31892 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
31893 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
31894 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
31895 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
31896 (selected special cases that actually occur in glibc,
31897 not by any means a complete implementation.)
31899 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31900 Long delta0 = delta;
31901 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
31902 if (delta > delta0) goto decode_success;
31903 /* else fall though; dis_PCMPxSTRx failed to decode it */
31905 break;
31907 case 0x5C ... 0x5F:
31908 case 0x68 ... 0x6F:
31909 case 0x78 ... 0x7F:
31910 /* FIXME: list the instructions decoded here */
31911 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31912 Long delta0 = delta;
31913 delta = dis_FMA4( pfx, delta, opc, uses_vvvv, vbi );
31914 if (delta > delta0) {
31915 dres->hint = Dis_HintVerbose;
31916 goto decode_success;
31918 /* else fall though; dis_FMA4 failed to decode it */
31920 break;
31922 case 0xDF:
31923 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
31924 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31925 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
31926 goto decode_success;
31928 break;
31930 case 0xF0:
31931 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
31932 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
31933 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
31934 Int size = getRexW(pfx) ? 8 : 4;
31935 IRType ty = szToITy(size);
31936 IRTemp src = newTemp(ty);
31937 UChar rm = getUChar(delta);
31938 UChar imm8;
31940 if (epartIsReg(rm)) {
31941 imm8 = getUChar(delta+1);
31942 assign( src, getIRegE(size,pfx,rm) );
31943 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
31944 nameIRegG(size,pfx,rm));
31945 delta += 2;
31946 } else {
31947 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
31948 imm8 = getUChar(delta+alen);
31949 assign( src, loadLE(ty, mkexpr(addr)) );
31950 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
31951 delta += alen + 1;
31953 imm8 &= 8*size-1;
31955 /* dst = (src >>u imm8) | (src << (size-imm8)) */
31956 putIRegG( size, pfx, rm,
31957 imm8 == 0 ? mkexpr(src)
31958 : binop( mkSizedOp(ty,Iop_Or8),
31959 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
31960 mkU8(imm8) ),
31961 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
31962 mkU8(8*size-imm8) ) ) );
31963 /* Flags aren't modified. */
31964 goto decode_success;
31966 break;
31968 default:
31969 break;
31973 //decode_failure:
31974 return deltaIN;
31976 decode_success:
31977 return delta;
31981 /*------------------------------------------------------------*/
31982 /*--- ---*/
31983 /*--- Disassemble a single instruction ---*/
31984 /*--- ---*/
31985 /*------------------------------------------------------------*/
31987 /* Disassemble a single instruction into IR. The instruction is
31988 located in host memory at &guest_code[delta]. */
31990 static
31991 DisResult disInstr_AMD64_WRK (
31992 /*OUT*/Bool* expect_CAS,
31993 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
31994 Bool resteerCisOk,
31995 void* callback_opaque,
31996 Long delta64,
31997 const VexArchInfo* archinfo,
31998 const VexAbiInfo* vbi,
31999 Bool sigill_diag
32002 IRTemp t1, t2;
32003 UChar pre;
32004 Int n, n_prefixes;
32005 DisResult dres;
32007 /* The running delta */
32008 Long delta = delta64;
32010 /* Holds eip at the start of the insn, so that we can print
32011 consistent error messages for unimplemented insns. */
32012 Long delta_start = delta;
32014 /* sz denotes the nominal data-op size of the insn; we change it to
32015 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32016 conflict REX.W takes precedence. */
32017 Int sz = 4;
32019 /* pfx holds the summary of prefixes. */
32020 Prefix pfx = PFX_EMPTY;
32022 /* Holds the computed opcode-escape indication. */
32023 Escape esc = ESC_NONE;
32025 /* Set result defaults. */
32026 dres.whatNext = Dis_Continue;
32027 dres.len = 0;
32028 dres.continueAt = 0;
32029 dres.jk_StopHere = Ijk_INVALID;
32030 dres.hint = Dis_HintNone;
32031 *expect_CAS = False;
32033 vassert(guest_RIP_next_assumed == 0);
32034 vassert(guest_RIP_next_mustcheck == False);
32036 t1 = t2 = IRTemp_INVALID;
32038 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
32040 /* Spot "Special" instructions (see comment at top of file). */
32042 const UChar* code = guest_code + delta;
32043 /* Spot the 16-byte preamble:
32044 48C1C703 rolq $3, %rdi
32045 48C1C70D rolq $13, %rdi
32046 48C1C73D rolq $61, %rdi
32047 48C1C733 rolq $51, %rdi
32049 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
32050 && code[ 3] == 0x03 &&
32051 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
32052 && code[ 7] == 0x0D &&
32053 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
32054 && code[11] == 0x3D &&
32055 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
32056 && code[15] == 0x33) {
32057 /* Got a "Special" instruction preamble. Which one is it? */
32058 if (code[16] == 0x48 && code[17] == 0x87
32059 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
32060 /* %RDX = client_request ( %RAX ) */
32061 DIP("%%rdx = client_request ( %%rax )\n");
32062 delta += 19;
32063 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
32064 vassert(dres.whatNext == Dis_StopHere);
32065 goto decode_success;
32067 else
32068 if (code[16] == 0x48 && code[17] == 0x87
32069 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32070 /* %RAX = guest_NRADDR */
32071 DIP("%%rax = guest_NRADDR\n");
32072 delta += 19;
32073 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
32074 goto decode_success;
32076 else
32077 if (code[16] == 0x48 && code[17] == 0x87
32078 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32079 /* call-noredir *%RAX */
32080 DIP("call-noredir *%%rax\n");
32081 delta += 19;
32082 t1 = newTemp(Ity_I64);
32083 assign(t1, getIRegRAX(8));
32084 t2 = newTemp(Ity_I64);
32085 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
32086 putIReg64(R_RSP, mkexpr(t2));
32087 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
32088 jmp_treg(&dres, Ijk_NoRedir, t1);
32089 vassert(dres.whatNext == Dis_StopHere);
32090 goto decode_success;
32092 else
32093 if (code[16] == 0x48 && code[17] == 0x87
32094 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
32095 /* IR injection */
32096 DIP("IR injection\n");
32097 vex_inject_ir(irsb, Iend_LE);
32099 // Invalidate the current insn. The reason is that the IRop we're
32100 // injecting here can change. In which case the translation has to
32101 // be redone. For ease of handling, we simply invalidate all the
32102 // time.
32103 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
32104 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
32106 delta += 19;
32108 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32109 dres.whatNext = Dis_StopHere;
32110 dres.jk_StopHere = Ijk_InvalICache;
32111 goto decode_success;
32113 /* We don't know what it is. */
32114 goto decode_failure;
32115 /*NOTREACHED*/
32119 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32120 as many invalid combinations as possible. */
32121 n_prefixes = 0;
32122 while (True) {
32123 if (n_prefixes > 7) goto decode_failure;
32124 pre = getUChar(delta);
32125 switch (pre) {
32126 case 0x66: pfx |= PFX_66; break;
32127 case 0x67: pfx |= PFX_ASO; break;
32128 case 0xF2: pfx |= PFX_F2; break;
32129 case 0xF3: pfx |= PFX_F3; break;
32130 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
32131 case 0x2E: pfx |= PFX_CS; break;
32132 case 0x3E: pfx |= PFX_DS; break;
32133 case 0x26: pfx |= PFX_ES; break;
32134 case 0x64: pfx |= PFX_FS; break;
32135 case 0x65: pfx |= PFX_GS; break;
32136 case 0x36: pfx |= PFX_SS; break;
32137 case 0x40 ... 0x4F:
32138 pfx |= PFX_REX;
32139 if (pre & (1<<3)) pfx |= PFX_REXW;
32140 if (pre & (1<<2)) pfx |= PFX_REXR;
32141 if (pre & (1<<1)) pfx |= PFX_REXX;
32142 if (pre & (1<<0)) pfx |= PFX_REXB;
32143 break;
32144 default:
32145 goto not_a_legacy_prefix;
32147 n_prefixes++;
32148 delta++;
32151 not_a_legacy_prefix:
32152 /* We've used up all the non-VEX prefixes. Parse and validate a
32153 VEX prefix if that's appropriate. */
32154 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
32155 /* Used temporarily for holding VEX prefixes. */
32156 UChar vex0 = getUChar(delta);
32157 if (vex0 == 0xC4) {
32158 /* 3-byte VEX */
32159 UChar vex1 = getUChar(delta+1);
32160 UChar vex2 = getUChar(delta+2);
32161 delta += 3;
32162 pfx |= PFX_VEX;
32163 /* Snarf contents of byte 1 */
32164 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32165 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
32166 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
32167 /* m-mmmm */
32168 switch (vex1 & 0x1F) {
32169 case 1: esc = ESC_0F; break;
32170 case 2: esc = ESC_0F38; break;
32171 case 3: esc = ESC_0F3A; break;
32172 /* Any other m-mmmm field will #UD */
32173 default: goto decode_failure;
32175 /* Snarf contents of byte 2 */
32176 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
32177 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
32178 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
32179 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
32180 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
32181 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
32182 /* pp */
32183 switch (vex2 & 3) {
32184 case 0: break;
32185 case 1: pfx |= PFX_66; break;
32186 case 2: pfx |= PFX_F3; break;
32187 case 3: pfx |= PFX_F2; break;
32188 default: vassert(0);
32191 else if (vex0 == 0xC5) {
32192 /* 2-byte VEX */
32193 UChar vex1 = getUChar(delta+1);
32194 delta += 2;
32195 pfx |= PFX_VEX;
32196 /* Snarf contents of byte 1 */
32197 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32198 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
32199 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
32200 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
32201 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
32202 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
32203 /* pp */
32204 switch (vex1 & 3) {
32205 case 0: break;
32206 case 1: pfx |= PFX_66; break;
32207 case 2: pfx |= PFX_F3; break;
32208 case 3: pfx |= PFX_F2; break;
32209 default: vassert(0);
32211 /* implied: */
32212 esc = ESC_0F;
32214 /* Can't have both VEX and REX */
32215 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
32216 goto decode_failure; /* can't have both */
32219 /* Dump invalid combinations */
32220 n = 0;
32221 if (pfx & PFX_F2) n++;
32222 if (pfx & PFX_F3) n++;
32223 if (n > 1)
32224 goto decode_failure; /* can't have both */
32226 n = 0;
32227 if (pfx & PFX_CS) n++;
32228 if (pfx & PFX_DS) n++;
32229 if (pfx & PFX_ES) n++;
32230 if (pfx & PFX_FS) n++;
32231 if (pfx & PFX_GS) n++;
32232 if (pfx & PFX_SS) n++;
32233 if (n > 1)
32234 goto decode_failure; /* multiple seg overrides == illegal */
32236 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32237 that we should accept it. */
32238 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const)
32239 goto decode_failure;
32241 /* Ditto for %gs prefixes. */
32242 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const)
32243 goto decode_failure;
32245 /* Set up sz. */
32246 sz = 4;
32247 if (pfx & PFX_66) sz = 2;
32248 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
32250 /* Now we should be looking at the primary opcode byte or the
32251 leading escapes. Check that any LOCK prefix is actually
32252 allowed. */
32253 if (haveLOCK(pfx)) {
32254 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
32255 DIP("lock ");
32256 } else {
32257 *expect_CAS = False;
32258 goto decode_failure;
32262 /* Eat up opcode escape bytes, until we're really looking at the
32263 primary opcode byte. But only if there's no VEX present. */
32264 if (!(pfx & PFX_VEX)) {
32265 vassert(esc == ESC_NONE);
32266 pre = getUChar(delta);
32267 if (pre == 0x0F) {
32268 delta++;
32269 pre = getUChar(delta);
32270 switch (pre) {
32271 case 0x38: esc = ESC_0F38; delta++; break;
32272 case 0x3A: esc = ESC_0F3A; delta++; break;
32273 default: esc = ESC_0F; break;
32278 /* So now we're really really looking at the primary opcode
32279 byte. */
32280 Long delta_at_primary_opcode = delta;
32282 if (!(pfx & PFX_VEX)) {
32283 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32284 instructions preserve the upper 128 bits of YMM registers;
32285 iow we can simply ignore the presence of the upper halves of
32286 these registers. */
32287 switch (esc) {
32288 case ESC_NONE:
32289 delta = dis_ESC_NONE( &dres, expect_CAS,
32290 resteerOkFn, resteerCisOk, callback_opaque,
32291 archinfo, vbi, pfx, sz, delta );
32292 break;
32293 case ESC_0F:
32294 delta = dis_ESC_0F ( &dres, expect_CAS,
32295 resteerOkFn, resteerCisOk, callback_opaque,
32296 archinfo, vbi, pfx, sz, delta );
32297 break;
32298 case ESC_0F38:
32299 delta = dis_ESC_0F38( &dres,
32300 resteerOkFn, resteerCisOk, callback_opaque,
32301 archinfo, vbi, pfx, sz, delta );
32302 break;
32303 case ESC_0F3A:
32304 delta = dis_ESC_0F3A( &dres,
32305 resteerOkFn, resteerCisOk, callback_opaque,
32306 archinfo, vbi, pfx, sz, delta );
32307 break;
32308 default:
32309 vassert(0);
32311 } else {
32312 /* VEX prefixed instruction */
32313 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32314 prefix that loads a YMM register operand ..." zeroes out bits
32315 128 and above of the register. */
32316 Bool uses_vvvv = False;
32317 switch (esc) {
32318 case ESC_0F:
32319 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
32320 resteerOkFn, resteerCisOk,
32321 callback_opaque,
32322 archinfo, vbi, pfx, sz, delta );
32323 break;
32324 case ESC_0F38:
32325 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
32326 resteerOkFn, resteerCisOk,
32327 callback_opaque,
32328 archinfo, vbi, pfx, sz, delta );
32329 break;
32330 case ESC_0F3A:
32331 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
32332 resteerOkFn, resteerCisOk,
32333 callback_opaque,
32334 archinfo, vbi, pfx, sz, delta );
32335 break;
32336 case ESC_NONE:
32337 /* The presence of a VEX prefix, by Intel definition,
32338 always implies at least an 0F escape. */
32339 goto decode_failure;
32340 default:
32341 vassert(0);
32343 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32344 Check this. */
32345 if (!uses_vvvv) {
32346 if (getVexNvvvv(pfx) != 0)
32347 goto decode_failure;
32351 vassert(delta - delta_at_primary_opcode >= 0);
32352 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
32354 /* Use delta == delta_at_primary_opcode to denote decode failure.
32355 This implies that any successful decode must use at least one
32356 byte up. */
32357 if (delta == delta_at_primary_opcode)
32358 goto decode_failure;
32359 else
32360 goto decode_success; /* \o/ */
32363 decode_failure:
32364 /* All decode failures end up here. */
32365 if (sigill_diag) {
32366 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32367 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32368 getUChar(delta_start+0),
32369 getUChar(delta_start+1),
32370 getUChar(delta_start+2),
32371 getUChar(delta_start+3),
32372 getUChar(delta_start+4),
32373 getUChar(delta_start+5),
32374 getUChar(delta_start+6),
32375 getUChar(delta_start+7),
32376 getUChar(delta_start+8),
32377 getUChar(delta_start+9) );
32378 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32379 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
32380 getRexX(pfx), getRexB(pfx));
32381 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32382 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
32383 getVexNvvvv(pfx),
32384 esc==ESC_NONE ? "NONE" :
32385 esc==ESC_0F ? "0F" :
32386 esc==ESC_0F38 ? "0F38" :
32387 esc==ESC_0F3A ? "0F3A" : "???");
32388 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32389 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
32390 haveF3(pfx) ? 1 : 0);
32393 /* Tell the dispatcher that this insn cannot be decoded, and so has
32394 not been executed, and (is currently) the next to be executed.
32395 RIP should be up-to-date since it made so at the start of each
32396 insn, but nevertheless be paranoid and update it again right
32397 now. */
32398 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
32399 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
32400 vassert(dres.whatNext == Dis_StopHere);
32401 dres.len = 0;
32402 /* We also need to say that a CAS is not expected now, regardless
32403 of what it might have been set to at the start of the function,
32404 since the IR that we've emitted just above (to synthesis a
32405 SIGILL) does not involve any CAS, and presumably no other IR has
32406 been emitted for this (non-decoded) insn. */
32407 *expect_CAS = False;
32408 return dres;
32411 decode_success:
32412 /* All decode successes end up here. */
32413 switch (dres.whatNext) {
32414 case Dis_Continue:
32415 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32416 break;
32417 case Dis_ResteerU:
32418 case Dis_ResteerC:
32419 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
32420 break;
32421 case Dis_StopHere:
32422 break;
32423 default:
32424 vassert(0);
32427 DIP("\n");
32428 dres.len = toUInt(delta - delta_start);
32429 return dres;
32432 #undef DIP
32433 #undef DIS
32436 /*------------------------------------------------------------*/
32437 /*--- Top-level fn ---*/
32438 /*------------------------------------------------------------*/
32440 /* Disassemble a single instruction into IR. The instruction
32441 is located in host memory at &guest_code[delta]. */
32443 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
32444 Bool (*resteerOkFn) ( void*, Addr ),
32445 Bool resteerCisOk,
32446 void* callback_opaque,
32447 const UChar* guest_code_IN,
32448 Long delta,
32449 Addr guest_IP,
32450 VexArch guest_arch,
32451 const VexArchInfo* archinfo,
32452 const VexAbiInfo* abiinfo,
32453 VexEndness host_endness_IN,
32454 Bool sigill_diag_IN )
32456 Int i, x1, x2;
32457 Bool expect_CAS, has_CAS;
32458 DisResult dres;
32460 /* Set globals (see top of this file) */
32461 vassert(guest_arch == VexArchAMD64);
32462 guest_code = guest_code_IN;
32463 irsb = irsb_IN;
32464 host_endness = host_endness_IN;
32465 guest_RIP_curr_instr = guest_IP;
32466 guest_RIP_bbstart = guest_IP - delta;
32468 /* We'll consult these after doing disInstr_AMD64_WRK. */
32469 guest_RIP_next_assumed = 0;
32470 guest_RIP_next_mustcheck = False;
32472 x1 = irsb_IN->stmts_used;
32473 expect_CAS = False;
32474 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
32475 resteerCisOk,
32476 callback_opaque,
32477 delta, archinfo, abiinfo, sigill_diag_IN );
32478 x2 = irsb_IN->stmts_used;
32479 vassert(x2 >= x1);
32481 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32482 got it right. Failure of this assertion is serious and denotes
32483 a bug in disInstr. */
32484 if (guest_RIP_next_mustcheck
32485 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
32486 vex_printf("\n");
32487 vex_printf("assumed next %%rip = 0x%llx\n",
32488 guest_RIP_next_assumed );
32489 vex_printf(" actual next %%rip = 0x%llx\n",
32490 guest_RIP_curr_instr + dres.len );
32491 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32494 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32495 expect_CAS. Here, we (sanity-)check for the presence/absence of
32496 IRCAS as directed by the returned expect_CAS value. */
32497 has_CAS = False;
32498 for (i = x1; i < x2; i++) {
32499 if (irsb_IN->stmts[i]->tag == Ist_CAS)
32500 has_CAS = True;
32503 if (expect_CAS != has_CAS) {
32504 /* inconsistency detected. re-disassemble the instruction so as
32505 to generate a useful error message; then assert. */
32506 vex_traceflags |= VEX_TRACE_FE;
32507 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
32508 resteerCisOk,
32509 callback_opaque,
32510 delta, archinfo, abiinfo, sigill_diag_IN );
32511 for (i = x1; i < x2; i++) {
32512 vex_printf("\t\t");
32513 ppIRStmt(irsb_IN->stmts[i]);
32514 vex_printf("\n");
32516 /* Failure of this assertion is serious and denotes a bug in
32517 disInstr. */
32518 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32521 return dres;
32525 /*------------------------------------------------------------*/
32526 /*--- Unused stuff ---*/
32527 /*------------------------------------------------------------*/
32529 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32530 // this should ever be needed.
32532 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32534 // /* Scheme is simple: propagate the most significant 1-bit into all
32535 // lower positions in the word. This gives a word of the form
32536 // 0---01---1. Now invert it, giving a word of the form
32537 // 1---10---0, then do a population-count idiom (to count the 1s,
32538 // which is the number of leading zeroes, or the word size if the
32539 // original word was 0.
32540 // */
32541 // Int i;
32542 // IRTemp t[7];
32543 // for (i = 0; i < 7; i++) {
32544 // t[i] = newTemp(ty);
32545 // }
32546 // if (ty == Ity_I64) {
32547 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32548 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32549 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32550 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32551 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32552 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32553 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32554 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32555 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32556 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32557 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32558 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32559 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32560 // return gen_POPCOUNT(ty, t[6]);
32561 // }
32562 // if (ty == Ity_I32) {
32563 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32564 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32565 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32566 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32567 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32568 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32569 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32570 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32571 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32572 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32573 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32574 // return gen_POPCOUNT(ty, t[5]);
32575 // }
32576 // if (ty == Ity_I16) {
32577 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32578 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32579 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32580 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32581 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32582 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32583 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32584 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32585 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32586 // return gen_POPCOUNT(ty, t[4]);
32587 // }
32588 // vassert(0);
32592 /*--------------------------------------------------------------------*/
32593 /*--- end guest_amd64_toIR.c ---*/
32594 /*--------------------------------------------------------------------*/